presage 0.9.2~beta
databaseConnector.cpp
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#include "databaseConnector.h"
26
27#include "../../core/utility.h"
28
29#include <list>
30#include <sstream>
31#include <stdlib.h>
32#include <assert.h>
33#include <boost/algorithm/string/replace.hpp>
34
35DatabaseConnector::DatabaseConnector(const std::string database_name,
36 const size_t cardinality,
37 const bool read_write)
38 : logger("DatabaseConnector", std::cerr)
39{
40 set_database_filename (database_name);
42 set_read_write_mode (read_write);
43}
44
45DatabaseConnector::DatabaseConnector(const std::string database_name,
46 const size_t cardinality,
47 const bool read_write,
48 const std::string& log_level)
49 : logger("DatabaseConnector", std::cerr, log_level)
50{
51 set_database_filename (database_name);
53 set_read_write_mode (read_write);
54}
55
57{}
58
59void DatabaseConnector::createNgramTable(const size_t n) const
60{
61 if (n > 0) {
62 std::stringstream query;
63 std::stringstream unique;
64 query << "CREATE TABLE";
65// This #ifdef does not belong here, but unfortunately SQLite 2.x does
66// not support the IF NOT EXISTS SQL clause.
67#ifndef HAVE_SQLITE_H
68 query << " IF NOT EXISTS";
69#endif
70 query << " _" << n << "_gram (";
71 for (int i = n - 1; i >= 0; i--) {
72 if (i != 0) {
73 unique << "word_" << i << ", ";
74 query << "word_" << i << " TEXT, ";
75 } else {
76 unique << "word";
77 query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );";
78 }
79 }
80
81 executeSql(query.str());
82 } else {
83 // TODO
84 // throw exception
85 }
86}
87
89{
90 std::string query = "SELECT SUM(count) FROM _1_gram;";
91
92 NgramTable result = executeSql(query);
93
94 logger << DEBUG << "NgramTable:";
95 for (size_t i = 0; i < result.size(); i++) {
96 for (size_t j = 0; j < result[i].size(); j++) {
97 logger << DEBUG << result[i][j] << '\t';
98 }
99 logger << DEBUG << endl;
100 }
101
102 return extractFirstInteger(result);
103}
104
106{
107 std::stringstream query;
108 query << "SELECT count "
109 << "FROM _" << ngram.size() << "_gram"
110 << buildWhereClause(ngram) << ";";
111
112 NgramTable result = executeSql(query.str());
113
114 logger << DEBUG << "NgramTable:";
115 for (size_t i = 0; i < result.size(); i++) {
116 for (size_t j = 0; j < result[i].size(); j++) {
117 logger << DEBUG << result[i][j] << '\t';
118 }
119 logger << DEBUG << endl;
120 }
121
122 return extractFirstInteger(result);
123}
124
125NgramTable DatabaseConnector::getNgramLikeTable(const Ngram ngram, const char** filter, const int count_threshold, int limit) const
126{
127 std::stringstream query;
128 query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
129 << "FROM _" << ngram.size() << "_gram"
130 << buildWhereLikeClause(ngram, filter, count_threshold)
131 << " ORDER BY count DESC";
132 if (limit < 0) {
133 query << ";";
134 } else {
135 query << " LIMIT " << limit << ';';
136 }
137
138 return executeSql(query.str());
139}
140
142{
143 int count = getNgramCount(ngram);
144
145 if (count > 0) {
146 // the ngram was found in the database
147 updateNgram(ngram, ++count);
148
149 logger << DEBUG << "Updated ngram to " << count << endl;
150
151 } else {
152 // the ngram was not found in the database
153 count = 1;
154 insertNgram(ngram, count);
155
156 logger << DEBUG << "Inserted ngram" << endl;
157
158 }
159 return count;
160}
161
163{}
164
165void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const
166{
167 std::stringstream query;
168
169 query << "INSERT INTO _" << ngram.size() << "_gram "
170 << buildValuesClause(ngram, count)
171 << ";";
172
173 executeSql(query.str());
174}
175
176void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const
177{
178 std::stringstream query;
179
180 query << "UPDATE _" << ngram.size() << "_gram "
181 << "SET count = " << count
182 << buildWhereClause(ngram) << ";";
183
184 executeSql(query.str());
185}
186
187std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const
188{
189 std::stringstream where_clause;
190 where_clause << " WHERE";
191 for (size_t i = 0; i < ngram.size(); i++) {
192 if (i < ngram.size() - 1) {
193 where_clause << " word_" << ngram.size() - i - 1 << " = '"
194 << sanitizeString(ngram[i]) << "' AND";
195 } else {
196 where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'";
197 }
198 }
199 return where_clause.str();
200}
201
203 const char** filter,
204 const int count_threshold) const
205{
206 std::stringstream where_clause;
207 where_clause << " WHERE";
208 for (size_t i = 0; i < ngram.size(); i++) {
209 if (i < ngram.size() - 1) {
210 where_clause << " word_" << ngram.size() - i - 1 << " = '"
211 << sanitizeString(ngram[i]) << "' AND";
212 } else {
213 if(filter == 0)
214 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
215 else {
216 std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]);
217 where_clause << " (";
218 for (int j = 0; filter[j] != 0; j++) {
219 if (j) {
220 where_clause << " OR ";
221 }
222 where_clause << " word LIKE '" << true_prefix << filter[j] << "%'";
223 }
224 where_clause << ')';
225 }
226 if (count_threshold > 0) {
227 where_clause << " AND count >= " << count_threshold;
228 }
229 }
230 }
231 return where_clause.str();
232}
233
234
235std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const
236{
237 assert(cardinality > 0);
238
239 std::stringstream result;
240 for (int i = cardinality - 1; i >= 0; i--) {
241 if (i != 0) {
242 result << "word_" << i << ", ";
243 } else {
244 result << "word, count";
245 }
246 }
247
248 return result.str();
249}
250
251std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const
252{
253 std::stringstream values_clause;
254 values_clause << "VALUES(";
255 for (size_t i = 0; i < ngram.size(); i++) {
256 if (i < ngram.size() - 1) {
257 values_clause << "'" << sanitizeString(ngram[i]) << "', ";
258 } else {
259 values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")";
260 }
261 }
262 return values_clause.str();
263}
264
265std::string DatabaseConnector::sanitizeString(const std::string str) const
266{
267 // Escape single quotes
268 return boost::replace_all_copy(str, "'", "''");
269}
270
272{
273 // Initialize count to zero and then check that we have at least
274 // an entry in the table of ngram counts returned by the
275 // executeSql() method. If so, convert it into an integer and
276 // return it.
277 //
278 // REVISIT: make conversion to integer more robust (strtol ??)
279 //
280 int count = 0;
281 if (table.size() > 0) {
282 if (table[0].size() > 0) {
283 count = atoi(table[0][0].c_str());
284 }
285 }
286
287 logger << DEBUG << "table: ";
288 for (size_t i = 0; i < table.size(); i++) {
289 for (size_t j = 0; j < table[i].size(); j++) {
290 logger << DEBUG << table[i][j] << '\t';
291 }
292 logger << DEBUG << endl;
293 }
294
295 return (count > 0 ? count : 0);
296}
297
299{
300 executeSql("BEGIN TRANSACTION;");
301}
302
304{
305 executeSql("END TRANSACTION;");
306}
307
309{
310 executeSql("ROLLBACK TRANSACTION;");
311}
312
314{
315 return database_filename;
316}
317
318std::string DatabaseConnector::set_database_filename (const std::string& filename)
319{
320 std::string prev_filename = database_filename;
321
323
324 // make an attempt at determining whether directory where language
325 // model database is located exists and try to create it if it
326 // does not... only cater for one directory level to create it.
327 //
328 std::string dir = Utility::dirname (database_filename);
329 if (! dir.empty()) {
330 // check that specified directory exists and accessible
331 if (! Utility::is_directory_usable (dir)) {
332 // create it if not
334 }
335 }
336
337 return prev_filename;
338}
339
340std::string DatabaseConnector::expand_variables (std::string filepath) const
341{
342 // scan the filepath for variables, which follow the same pattern
343 // as shell variables - strings enclosed in '${' and '}'
344 //
345 const std::string start_marker = "${";
346 const std::string end_marker = "}";
347
348 std::list<std::string> variables;
349
350 std::string::size_type pos_start = filepath.find (start_marker);
351 while (pos_start != std::string::npos)
352 {
353 std::string::size_type pos_end = filepath.find (end_marker, pos_start);
354 if (pos_end != std::string::npos) {
355 variables.push_back (filepath.substr(pos_start + start_marker.size(), pos_end - end_marker.size() - pos_start - 1));
356 }
357
358 pos_start = filepath.find (start_marker, pos_end);
359 }
360
361 for (std::list<std::string>::const_iterator it = variables.begin();
362 it != variables.end();
363 it++)
364 {
365 substitute_variable_in_string(*it, filepath);
366 }
367
368 return filepath;
369}
370
371void DatabaseConnector::substitute_variable_in_string (const std::string& variable_name, std::string& filepath) const
372{
373 std::string variable_token = "${" + variable_name + "}";
374
375 for (std::string::size_type pos = filepath.find (variable_token);
376 pos != std::string::npos;
377 pos = filepath.find (variable_token, pos))
378 {
379 const char* value = getenv(variable_name.c_str());
380 if (value)
381 {
382 filepath.replace (pos,
383 variable_token.size(),
384 value);
385 }
386 else
387 {
388 // handle "special" variables
389 if (variable_name == "HOME")
390 {
391 value = getenv("USERPROFILE");
392 if (value)
393 {
394 filepath.replace (pos,
395 variable_token.size(),
396 value);
397 }
398 }
399 else
400 {
401 // FIXME: maybe throw exception instead of leaving
402 // variable name in string?
403 //
404 filepath.replace (pos,
405 variable_token.size(),
406 variable_name);
407 }
408 }
409 }
410}
411
413{
414 cardinality = card;
415}
416
418{
419 return cardinality;
420}
421
422void DatabaseConnector::set_read_write_mode (const bool read_write)
423{
424 read_write_mode = read_write;
425}
426
428{
429 return read_write_mode;
430}
void set_read_write_mode(const bool read_write)
virtual void endTransaction() const
void createNgramTable(const size_t cardinality) const
virtual NgramTable executeSql(const std::string query) const =0
virtual void beginTransaction() const
void substitute_variable_in_string(const std::string &variable_name, std::string &filepath) const
std::string buildWhereLikeClause(const Ngram ngram, const char **filter, const int count_threshold) const
void removeNgram(const Ngram ngram) const
virtual void rollbackTransaction() const
std::string buildValuesClause(const Ngram ngram, const int count) const
int incrementNgramCount(const Ngram ngram) const
size_t get_cardinality() const
void insertNgram(const Ngram ngram, const int count) const
int extractFirstInteger(const NgramTable &) const
std::string database_filename
std::string buildSelectLikeClause(const int cardinality) const
std::string buildWhereClause(const Ngram ngram) const
std::string set_database_filename(const std::string &filename)
int getUnigramCountsSum() const
int getNgramCount(const Ngram ngram) const
void updateNgram(const Ngram ngram, const int count) const
NgramTable getNgramLikeTable(const Ngram ngram, const char **filter, const int count_threshold, int limit=-1) const
std::string sanitizeString(const std::string) const
std::string expand_variables(std::string filename) const
std::string get_database_filename() const
bool get_read_write_mode() const
void set_cardinality(const size_t cardinality)
DatabaseConnector(const std::string database_name, const size_t cardinality, const bool read_write)
Logger< char > logger
Definition: ngram.h:33
static void create_directory(const std::string &dir)
Definition: utility.cpp:330
static std::string dirname(const std::string &)
Definition: utility.cpp:275
static bool is_directory_usable(const std::string &dir)
Definition: utility.cpp:307
std::vector< Ngram > NgramTable
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition: logger.h:278