1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 from UserDict import UserDict
22
23 """Module to provide a cache of statistics in a database.
24
25 @organization: Zuza Software Foundation
26 @copyright: 2007 Zuza Software Foundation
27 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
28 """
29
30 from translate import __version__ as toolkitversion
31 from translate.storage import factory, base
32 from translate.misc.multistring import multistring
33 from translate.lang.common import Common
34
35 try:
36 from sqlite3 import dbapi2
37 except ImportError:
38 from pysqlite2 import dbapi2
39 import os.path
40 import re
41 import sys
42 import stat
43
44 kdepluralre = re.compile("^_n: ")
45 brtagre = re.compile("<br\s*?/?>")
46 xmltagre = re.compile("<[^>]+>")
47 numberre = re.compile("\\D\\.\\D")
48
49 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
50
52
53 string = kdepluralre.sub("", string)
54 string = brtagre.sub("\n", string)
55 string = xmltagre.sub("", string)
56 string = numberre.sub(" ", string)
57
58
59 return len(Common.words(string))
60
62 """Counts the words in the unit's source and target, taking plurals into
63 account. The target words are only counted if the unit is translated."""
64 (sourcewords, targetwords) = (0, 0)
65 if isinstance(unit.source, multistring):
66 sourcestrings = unit.source.strings
67 else:
68 sourcestrings = [unit.source or ""]
69 for s in sourcestrings:
70 sourcewords += wordcount(s)
71 if not unit.istranslated():
72 return sourcewords, targetwords
73 if isinstance(unit.target, multistring):
74 targetstrings = unit.target.strings
75 else:
76 targetstrings = [unit.target or ""]
77 for s in targetstrings:
78 targetwords += wordcount(s)
79 return sourcewords, targetwords
80
82 - def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
89
91 return tuple(self[key] for key in self.record_keys)
92
99
106
109
110 UNTRANSLATED, TRANSLATED, FUZZY = 0, 1, 2
118
120 keys = ['translatedsourcewords',
121 'fuzzysourcewords',
122 'untranslatedsourcewords',
123 'translated',
124 'fuzzy',
125 'untranslated',
126 'translatedtargetwords']
127
130
132 self.cur = cur
133 self.cur.execute("""
134 CREATE TABLE IF NOT EXISTS filetotals(
135 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
136 translatedsourcewords INTEGER NOT NULL,
137 fuzzysourcewords INTEGER NOT NULL,
138 untranslatedsourcewords INTEGER NOT NULL,
139 translated INTEGER NOT NULL,
140 fuzzy INTEGER NOT NULL,
141 untranslated INTEGER NOT NULL,
142 translatedtargetwords INTEGER NOT NULL);""")
143
144 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
145 record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
146 if state_for_db is not None:
147 if state_for_db is UNTRANSLATED:
148 record['untranslated'] = 1
149 record['untranslatedsourcewords'] = sourcewords
150 if state_for_db is TRANSLATED:
151 record['translated'] = 1
152 record['translatedsourcewords'] = sourcewords
153 record['translatedtargetwords'] = targetwords
154 elif state_for_db is FUZZY:
155 record['fuzzy'] = 1
156 record['fuzzysourcewords'] = sourcewords
157 return record
158
159 new_record = classmethod(new_record)
160
162 record["total"] = record["untranslated"] + \
163 record["translated"] + \
164 record["fuzzy"]
165 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
166 record["translatedsourcewords"] + \
167 record["fuzzysourcewords"]
168 record["review"] = 0
169 _compute_derived_values = classmethod(_compute_derived_values)
170
177
179 self.cur.execute("""
180 INSERT OR REPLACE into filetotals
181 VALUES (%(fileid)d, %(vals)s);
182 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
183
185 self.cur.execute("""
186 DELETE FROM filetotals
187 WHERE fileid=?;
188 """, (fileid,))
189
191 """Returns a dictionary with all statistics initalised to 0."""
192 return FileTotals.new_record()
193
196
198 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
199
201 return {"sourcewordcount": [], "targetwordcount": []}
202
203
204
205
206
207
208
210 file_stat = os.stat(file_path)
211 assert not stat.S_ISDIR(file_stat.st_mode)
212 return file_stat.st_mtime, file_stat.st_size
213
215 return os.path.extsep + 'pending'
216
219
221 """An object instantiated as a singleton for each statsfile that provides
222 access to the database cache from a pool of StatsCache objects."""
223 _caches = {}
224 defaultfile = None
225 con = None
226 """This cache's connection"""
227 cur = None
228 """The current cursor"""
229
231 def make_database(statsfile):
232 def connect(cache):
233 cache.con = dbapi2.connect(statsfile)
234 cache.cur = cache.con.cursor()
235
236 def clear_old_data(cache):
237 try:
238 cache.cur.execute("""SELECT toolkitbuild FROM files""")
239 val = cache.cur.fetchone()
240 if val is not None:
241 if val[0] < toolkitversion.build:
242 del cache
243 os.unlink(statsfile)
244 return True
245 return False
246 except dbapi2.OperationalError:
247 return False
248
249 cache = cls._caches[statsfile] = object.__new__(cls)
250 connect(cache)
251 if clear_old_data(cache):
252 connect(cache)
253 cache.create()
254 return cache
255
256 if not statsfile:
257 if not cls.defaultfile:
258 userdir = os.path.expanduser("~")
259 cachedir = None
260 if os.name == "nt":
261 cachedir = os.path.join(userdir, "Translate Toolkit")
262 else:
263 cachedir = os.path.join(userdir, ".translate_toolkit")
264 if not os.path.exists(cachedir):
265 os.mkdir(cachedir)
266 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
267 statsfile = cls.defaultfile
268 else:
269 statsfile = os.path.realpath(statsfile)
270
271 if statsfile in cls._caches:
272 return cls._caches[statsfile]
273
274 return make_database(statsfile)
275
277 """Create all tables and indexes."""
278 self.file_totals = FileTotals(self.cur)
279
280 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
281 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
282 path VARCHAR NOT NULL UNIQUE,
283 st_mtime INTEGER NOT NULL,
284 st_size INTEGER NOT NULL,
285 toolkitbuild INTEGER NOT NULL);""")
286
287 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
288 ON files (path);""")
289
290 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
291 id INTEGER PRIMARY KEY AUTOINCREMENT,
292 unitid VARCHAR NOT NULL,
293 fileid INTEGER NOT NULL,
294 unitindex INTEGER NOT NULL,
295 source VARCHAR NOT NULL,
296 target VARCHAR,
297 state INTEGER,
298 sourcewords INTEGER,
299 targetwords INTEGER);""")
300
301 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
302 ON units(fileid);""")
303
304 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
305 configid INTEGER PRIMARY KEY AUTOINCREMENT,
306 config VARCHAR);""")
307
308 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
309 ON checkerconfigs(config);""")
310
311 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
312 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
313 unitindex INTEGER NOT NULL,
314 fileid INTEGER NOT NULL,
315 configid INTEGER NOT NULL,
316 name VARCHAR NOT NULL,
317 message VARCHAR);""")
318
319 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
320 ON uniterrors(fileid, configid);""")
321
322 self.con.commit()
323
324 - def _getfileid(self, filename, check_mod_info=True, store=None, errors_return_empty=False):
325 """Attempt to find the fileid of the given file, if it hasn't been
326 updated since the last record update.
327
328 None is returned if either the file's record is not found, or if it is
329 not up to date.
330
331 @param filename: the filename to retrieve the id for
332 @param opt_mod_info: an optional mod_info to consider in addition
333 to the actual mod_info of the given file
334 @rtype: String or None
335 """
336 realpath = os.path.realpath(filename)
337 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
338 WHERE path=?;""", (realpath,))
339 filerow = self.cur.fetchone()
340 try:
341 mod_info = get_mod_info(realpath)
342 if filerow:
343 fileid = filerow[0]
344 if not check_mod_info:
345
346 self.cur.execute("""UPDATE files
347 SET st_mtime=?, st_size=?
348 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
349 return fileid
350 if (filerow[1], filerow[2]) == mod_info:
351 return fileid
352
353 assert check_mod_info
354 store = store or factory.getobject(realpath)
355 return self._cachestore(store, realpath, mod_info)
356 except (base.ParseError, IOError, OSError, AssertionError):
357 if errors_return_empty:
358 return -1
359 else:
360 raise
361
363 """See if this checker configuration has been used before."""
364 config = str(checker.config.__dict__)
365 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
366 config=?;""", (config,))
367 configrow = self.cur.fetchone()
368 if not configrow or configrow[1] != config:
369 return None
370 else:
371 return configrow[0]
372
374 """Cache the statistics for the supplied unit(s)."""
375 unitvalues = []
376 for index, unit in enumerate(units):
377 if unit.istranslatable():
378 sourcewords, targetwords = wordsinunit(unit)
379 if unitindex:
380 index = unitindex
381
382 unitvalues.append((unit.getid(), fileid, index, \
383 unit.source, unit.target, \
384 sourcewords, targetwords, \
385 statefordb(unit)))
386 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
387
388 self.cur.executemany("""INSERT INTO units
389 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
390 values (?, ?, ?, ?, ?, ?, ?, ?);""",
391 unitvalues)
392 self.file_totals[fileid] = file_totals_record
393 self.con.commit()
394 if unitindex:
395 return state_strings[statefordb(units[0])]
396 return ""
397
399 """Calculates and caches the statistics of the given store
400 unconditionally."""
401 self.cur.execute("""DELETE FROM files WHERE
402 path=?;""", (realpath,))
403 self.cur.execute("""INSERT INTO files
404 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
405 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
406 fileid = self.cur.lastrowid
407 self.cur.execute("""DELETE FROM units WHERE
408 fileid=?""", (fileid,))
409 self._cacheunitstats(store.units, fileid)
410 return fileid
411
413 """Retrieves the statistics for the given file if possible, otherwise
414 delegates to cachestore()."""
415 fileid = None
416 if not fileid:
417 try:
418 fileid = self._getfileid(filename)
419 except ValueError, e:
420 print >> sys.stderr, str(e)
421 return {}
422 return self.file_totals[fileid]
423
425 """Helper method for cachestorechecks() and recacheunit()"""
426
427
428 dummy = (-1, fileid, configid, "noerror", "")
429 unitvalues = [dummy]
430
431 errornames = []
432 for index, unit in enumerate(units):
433 if unit.istranslatable():
434
435 if unitindex:
436 index = unitindex
437 failures = checker.run_filters(unit)
438 for checkname, checkmessage in failures.iteritems():
439 unitvalues.append((index, fileid, configid, checkname, checkmessage))
440 errornames.append("check-" + checkname)
441 checker.setsuggestionstore(None)
442
443 if unitindex:
444
445
446 unitvalues.remove(dummy)
447 errornames.append("total")
448
449
450 self.cur.executemany("""INSERT INTO uniterrors
451 (unitindex, fileid, configid, name, message)
452 values (?, ?, ?, ?, ?);""",
453 unitvalues)
454 self.con.commit()
455 return errornames
456
458 """Calculates and caches the error statistics of the given store
459 unconditionally."""
460
461
462 self.cur.execute("""DELETE FROM uniterrors WHERE
463 fileid=?;""", (fileid,))
464 self._cacheunitschecks(store.units, fileid, configid, checker)
465 return fileid
466
468 values = self.cur.execute("""
469 SELECT state, sourcewords, targetwords
470 FROM units
471 WHERE fileid=? AND unitid=?
472 """, (fileid, unitid))
473 return values.fetchone()
474
476 """Recalculate all information for a specific unit. This is necessary
477 for updating all statistics when a translation of a unit took place,
478 for example.
479
480 This method assumes that everything was up to date before (file totals,
481 checks, checker config, etc."""
482 fileid = self._getfileid(filename, check_mod_info=False)
483 configid = self._getstoredcheckerconfig(checker)
484 unitid = unit.getid()
485
486 totals_without_unit = self.file_totals[fileid] - \
487 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
488 self.cur.execute("""SELECT unitindex FROM units WHERE
489 fileid=? AND unitid=?;""", (fileid, unitid))
490 unitindex = self.cur.fetchone()[0]
491 self.cur.execute("""DELETE FROM units WHERE
492 fileid=? AND unitid=?;""", (fileid, unitid))
493 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
494
495 self.cur.execute("""DELETE FROM uniterrors WHERE
496 fileid=? AND unitindex=?;""", (fileid, unitindex))
497 if os.path.exists(suggestion_filename(filename)):
498 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
499 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
500 return state
501
502 - def _checkerrors(self, filename, fileid, configid, checker, store):
503 def geterrors():
504 self.cur.execute("""SELECT
505 name,
506 unitindex
507 FROM uniterrors WHERE fileid=? and configid=?
508 ORDER BY unitindex;""", (fileid, configid))
509 return self.cur.fetchone(), self.cur
510
511 first, cur = geterrors()
512 if first is not None:
513 return first, cur
514
515
516
517 store = store or factory.getobject(filename)
518 if os.path.exists(suggestion_filename(filename)):
519 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
520 self.cachestorechecks(fileid, store, checker, configid)
521 return geterrors()
522
523 - def _geterrors(self, filename, fileid, configid, checker, store):
524 result = []
525 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
526 result.append(first)
527 result.extend(cur.fetchall())
528 return result
529
531 configid = self._getstoredcheckerconfig(checker)
532 if configid:
533 return configid
534 self.cur.execute("""INSERT INTO checkerconfigs
535 (configid, config) values (NULL, ?);""",
536 (str(checker.config.__dict__),))
537 return self.cur.lastrowid
538
539 - def filechecks(self, filename, checker, store=None):
540 """Retrieves the error statistics for the given file if possible,
541 otherwise delegates to cachestorechecks()."""
542 fileid = None
543 configid = None
544 try:
545 fileid = self._getfileid(filename, store=store)
546 configid = self._get_config_id(fileid, checker)
547 except ValueError, e:
548 print >> sys.stderr, str(e)
549 return emptyfilechecks()
550
551 values = self._geterrors(filename, fileid, configid, checker, store)
552
553 errors = emptyfilechecks()
554 for value in values:
555 if value[1] == -1:
556 continue
557 checkkey = 'check-' + value[0]
558 if not checkkey in errors:
559 errors[checkkey] = []
560 errors[checkkey].append(value[1])
561
562 return errors
563
565 fileid = self._getfileid(filename)
566 configid = self._get_config_id(fileid, checker)
567 self._checkerrors(filename, fileid, configid, checker, None)
568 self.cur.execute("""SELECT
569 name,
570 unitindex
571 FROM uniterrors
572 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
573 return self.cur.fetchone() is not None
574
575 - def filestats(self, filename, checker, store=None):
576 """Return a dictionary of property names mapping sets of unit
577 indices with those properties."""
578 stats = emptyfilestats()
579
580 stats.update(self.filechecks(filename, checker, store))
581 fileid = self._getfileid(filename, store=store)
582
583 self.cur.execute("""SELECT
584 state,
585 unitindex
586 FROM units WHERE fileid=?
587 ORDER BY unitindex;""", (fileid,))
588
589 values = self.cur.fetchall()
590 for value in values:
591 stats[state_strings[value[0]]].append(value[1])
592 stats["total"].append(value[1])
593
594 return stats
595
596 - def unitstats(self, filename, _lang=None, store=None):
597
598
599
600 """Return a dictionary of property names mapping to arrays which
601 map unit indices to property values.
602
603 Please note that this is different from filestats, since filestats
604 supplies sets of unit indices with a given property, whereas this
605 method supplies arrays which map unit indices to given values."""
606 stats = emptyunitstats()
607
608
609 fileid = self._getfileid(filename, store=store)
610
611 self.cur.execute("""SELECT
612 sourcewords, targetwords
613 FROM units WHERE fileid=?
614 ORDER BY unitindex;""", (fileid,))
615
616 for sourcecount, targetcount in self.cur.fetchall():
617 stats["sourcewordcount"].append(sourcecount)
618 stats["targetwordcount"].append(targetcount)
619
620 return stats
621