Commit d9cc33cf authored by nkour's avatar nkour

log system rewrite to use sqlite database instead of plain ascii files. this...

log system rewrite to use sqlite database instead of plain ascii files. this allows us to scale better (be faster), provide search in history, and save logs for JIDs that are non-ASCII. PLEASE read http://trac.gajim.org/wiki/MigrateLogToDot9DB
parent 767dc426
......@@ -4,7 +4,7 @@ Welcome and thanks for trying out Gajim.
python2.4 (python2.3 should work too)
pygtk2.6 or higher
python-libglade
python-pysqlite2
pysqlite2 (aka. python-pysqlite2)
some distros also split too much python standard library.
I know SUSE does. In such distros you also need python-xml
......
......@@ -29,6 +29,7 @@ src/systraywin32.py
src/tabbed_chat_window.py
src/tooltips.py
src/vcard.py
src/common/check_paths.py
src/common/GnuPG.py
src/common/GnuPGInterface.py
src/common/__init__.py
......
......@@ -175,10 +175,3 @@ def visit(arg, dirname, filenames):
f.write('You can always run the migration script to import your old logs to the database\n')
f.write('Thank you\n')
f.close()
# after huge import create the indices (they are slow on massive insert)
cur.executescript(
'''
CREATE UNIQUE INDEX jids_already_index ON jids (jid);
CREATE INDEX jid_id_index ON logs (jid_id);
'''
)
## Gajim Team:
## - Yann Le Boulanger <asterix@lagaule.org>
## - Vincent Hanquez <tab@snarc.org>
## - Nikos Kouremenos <kourem@gmail.com>
## - Travis Shirk <travis@pobox.com>
##
## Copyright (C) 2003-2005 Gajim Team
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published
## by the Free Software Foundation; version 2 only.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
import os
import sys
import stat
import gajim
import logger
import i18n
_ = i18n._
Q_ = i18n.Q_
from pysqlite2 import dbapi2 as sqlite # DO NOT MOVE ABOVE OF import gajim
def create_log_db():
print _('creating logs database')
con = sqlite.connect(logger.LOG_DB_PATH)
cur = con.cursor()
# create the tables
# kind can be
# status, gcstatus, gc_msg, (we only recv for those 3),
# single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent
# to meet all our needs
# logs.jid_id --> jids.jid_id but Sqlite doesn't do FK etc so it's done in python code
# jids.jid text column will be JID if TC-related, room_jid if GC-related,
# ROOM_JID/nick if pm-related.
cur.executescript(
'''
CREATE TABLE jids(
jid_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
jid TEXT UNIQUE
);
CREATE TABLE logs(
log_line_id INTEGER PRIMARY KEY AUTOINCREMENT UNIQUE,
jid_id INTEGER,
contact_name TEXT,
time INTEGER,
kind TEXT,
show TEXT,
message TEXT
);
'''
)
con.commit()
def check_and_possible_create_paths():
LOG_DB_PATH = logger.LOG_DB_PATH
VCARDPATH = gajim.VCARDPATH
dot_gajim = os.path.dirname(VCARDPATH)
if os.path.isfile(dot_gajim):
print _('%s is file but it should be a directory') % dot_gajim
print _('Gajim will now exit')
sys.exit()
elif os.path.isdir(dot_gajim):
s = os.stat(dot_gajim)
if s.st_mode & stat.S_IROTH: # others have read permission!
os.chmod(dot_gajim, 0700) # rwx------
if not os.path.exists(VCARDPATH):
print _('creating %s directory') % VCARDPATH
os.mkdir(VCARDPATH, 0700)
elif os.path.isfile(VCARDPATH):
print _('%s is file but it should be a directory') % VCARDPATH
print _('Gajim will now exit')
sys.exit()
if not os.path.exists(LOG_DB_PATH):
create_log_db()
elif os.path.isdir(LOG_DB_PATH):
print _('%s is directory but should be file') % LOG_DB_PATH
print _('Gajim will now exit')
sys.exit()
else: # dot_gajim doesn't exist
if dot_gajim: # is '' on win9x so avoid that
print _('creating %s directory') % dot_gajim
os.mkdir(dot_gajim, 0700)
if not os.path.isdir(VCARDPATH):
print _('creating %s directory') % VCARDPATH
os.mkdir(VCARDPATH, 0700)
if not os.path.isfile(LOG_DB_PATH):
create_log_db()
......@@ -364,12 +364,12 @@ def _messageCB(self, con, msg):
if not msg.getTag('body'): #no <body>
return
self.dispatch('GC_MSG', (frm, msgtxt, tim))
gajim.logger.write('gc', msgtxt, frm, tim = tim)
gajim.logger.write('gc_msg', frm, msgtxt, tim = tim)
elif mtype == 'normal': # it's single message
log_msgtxt = msgtxt
if subject:
log_msgtxt = _('Subject: %s\n%s') % (subject, msgtxt)
gajim.logger.write('incoming', log_msgtxt, frm, tim = tim)
gajim.logger.write('single_msg_recv', frm, log_msgtxt, tim = tim)
if invite is not None:
item = invite.getTag('invite')
jid_from = item.getAttr('from')
......@@ -387,7 +387,7 @@ def _messageCB(self, con, msg):
if subject:
log_msgtxt = _('Subject: %s\n%s') % (subject, msgtxt)
if msg.getTag('body'):
gajim.logger.write('incoming', log_msgtxt, frm, tim = tim)
gajim.logger.write('chat_msg_recv', frm, log_msgtxt, tim = tim)
self.dispatch('MSG', (frm, msgtxt, tim, encrypted, mtype, subject,
chatstate))
# END messageCB
......@@ -469,7 +469,7 @@ def _presenceCB(self, con, prs):
self.dispatch('ERROR_ANSWER', ('', jid_stripped,
errmsg, errcode))
if not ptype or ptype == 'unavailable':
gajim.logger.write('status', status, who, show)
gajim.logger.write('gcstatus', who, status, show)
self.dispatch('GC_NOTIFY', (jid_stripped, show, status, resource,
prs.getRole(), prs.getAffiliation(), prs.getJid(),
prs.getReason(), prs.getActor(), prs.getStatusCode(),
......@@ -517,7 +517,7 @@ def _presenceCB(self, con, prs):
else:
self.vcard_shas[jid_stripped] = avatar_sha
if not ptype or ptype == 'unavailable':
gajim.logger.write('status', status, jid_stripped, show)
gajim.logger.write('status', jid_stripped, status, show)
self.dispatch('NOTIFY', (jid_stripped, show, status, resource, prio,
keyID))
# END presenceCB
......@@ -1898,7 +1898,11 @@ def send_message(self, jid, msg, keyID, type = 'chat', subject='', chatstate = N
if subject:
log_msg = _('Subject: %s\n%s') % (subject, msg)
if log_msg:
gajim.logger.write('outgoing', log_msg, jid)
if type == 'chat':
kind = 'chat_msg_sent'
else:
kind = 'single_msg_sent'
gajim.logger.write(kind, jid, log_msg)
self.dispatch('MSGSENT', (jid, msg, keyID))
def send_stanza(self, stanza):
......
......@@ -23,7 +23,7 @@
import mutex
import common.config
import common.logger
interface = None # The actual interface (the gtk one for the moment)
version = '0.9'
......@@ -37,6 +37,7 @@
log = logging.getLogger('Gajim')
log.addHandler(h)
import common.logger
logger = common.logger.Logger() # init the logger
if os.name == 'nt':
......@@ -45,25 +46,21 @@
else:
DATA_DIR = os.path.join('..', 'data')
try:
# Documents and Settings\[User Name]\Application Data\Gajim\logs
# Documents and Settings\[User Name]\Application Data\Gajim
LOGPATH = os.path.join(os.environ['appdata'], 'Gajim', 'Logs') # deprecated
LOG_DB_PATH = os.path.join(os.environ['appdata'], 'Gajim', 'logs.db')
VCARDPATH = os.path.join(os.environ['appdata'], 'Gajim', 'Vcards')
except KeyError:
# win9x, ./Logs etc
# win9x, in cwd
LOGPATH = 'Logs' # deprecated
LOG_DB_PATH = 'logs.db'
VCARDPATH = 'Vcards'
else: # Unices
DATA_DIR = '../data'
LOGPATH = os.path.expanduser('~/.gajim/logs') # deprecated
LOG_DB_PATH = os.path.expanduser('~/.gajim/logs.db')
VCARDPATH = os.path.expanduser('~/.gajim/vcards')
try:
LOGPATH = LOGPATH.decode(sys.getfilesystemencoding())
VCARDPATH = VCARDPATH.decode(sys.getfilesystemencoding())
LOG_DB_PATH = LOG_DB_PATH.decode(sys.getfilesystemencoding())
except:
pass
......
......@@ -23,8 +23,10 @@
import errno
import sys
import stat
from pysqlite2 import dbapi2 as sqlite
import gajim
import logger
from common import i18n
from common.xmpp_stringprep import nodeprep, resourceprep, nameprep
......@@ -49,8 +51,8 @@ def parse_jid(jidstring):
resource = None
# Search for delimiters
user_sep = jidstring.find("@")
res_sep = jidstring.find("/")
user_sep = jidstring.find('@')
res_sep = jidstring.find('/')
if user_sep == -1:
if res_sep == -1:
......@@ -136,53 +138,6 @@ def temp_failure_retry(func, *args, **kwargs):
else:
raise
def check_paths():
LOGPATH = gajim.LOGPATH
VCARDPATH = gajim.VCARDPATH
dot_gajim = os.path.dirname(LOGPATH)
if os.path.isfile(dot_gajim):
print _('%s is file but it should be a directory') % dot_gajim
print _('Gajim will now exit')
sys.exit()
elif os.path.isdir(dot_gajim):
s = os.stat(dot_gajim)
if s.st_mode & stat.S_IROTH: # others have read permission!
os.chmod(dot_gajim, 0700) # rwx------
if not os.path.exists(LOGPATH):
print _('creating %s directory') % LOGPATH
os.mkdir(LOGPATH, 0700)
elif os.path.isfile(LOGPATH):
print _('%s is file but it should be a directory') % LOGPATH
print _('Gajim will now exit')
sys.exit()
elif os.path.isdir(LOGPATH):
s = os.stat(LOGPATH)
if s.st_mode & stat.S_IROTH: # others have read permission!
os.chmod(LOGPATH, 0700) # rwx------
if not os.path.exists(VCARDPATH):
print _('creating %s directory') % VCARDPATH
os.mkdir(VCARDPATH, 0700)
elif os.path.isfile(VCARDPATH):
print _('%s is file but it should be a directory') % VCARDPATH
print _('Gajim will now exit')
sys.exit()
elif os.path.isdir(VCARDPATH):
s = os.stat(VCARDPATH)
if s.st_mode & stat.S_IROTH: # others have read permission!
os.chmod(VCARDPATH, 0700) # rwx------
else: # dot_gajim doesn't exist
if dot_gajim: # is '' on win9x so avoid that
print _('creating %s directory') % dot_gajim
os.mkdir(dot_gajim, 0700)
if not os.path.isdir(LOGPATH):
print _('creating %s directory') % LOGPATH
os.mkdir(LOGPATH, 0700)
if not os.path.isdir(VCARDPATH):
print _('creating %s directory') % VCARDPATH
os.mkdir(VCARDPATH, 0700)
def convert_bytes(string):
suffix = ''
# IEC standard says KiB = 1024 bytes KB = 1000 bytes
......@@ -452,7 +407,7 @@ def play_sound(event):
return
if not os.path.exists(path_to_soundfile):
return
if os.name == 'nt':
if os.name == 'nt':
try:
winsound.PlaySound(path_to_soundfile,
winsound.SND_FILENAME|winsound.SND_ASYNC)
......
......@@ -18,202 +18,217 @@
##
import os
import sys
import time
import datetime
import common.gajim
from common import i18n
_ = i18n._
import helpers
try:
from pysqlite2 import dbapi2 as sqlite
except ImportError:
error = _('pysqlite2 (aka python-pysqlite2) dependency is missing. '\
'After you install pysqlite3, if you want to migrate your logs '\
'to the new database, please read: http://trac.gajim.org/wiki/MigrateLogToDot9DB '
'Exiting...'
)
print >> sys.stderr, error
sys.exit()
GOT_JIDS_ALREADY_IN_DB = False
if os.name == 'nt':
try:
# Documents and Settings\[User Name]\Application Data\Gajim\logs.db
LOG_DB_PATH = os.path.join(os.environ['appdata'], 'Gajim', 'logs.db')
except KeyError:
# win9x, ./logs.db
LOG_DB_PATH = 'logs.db'
else: # Unices
LOG_DB_PATH = os.path.expanduser('~/.gajim/logs.db')
try:
LOG_DB_PATH = LOG_DB_PATH.decode(sys.getfilesystemencoding())
except:
pass
class Logger:
def __init__(self):
pass
if not os.path.exists(LOG_DB_PATH):
# this can happen only the first time (the time we create the db)
# db is created in src/common/checks_paths.py
return
self.get_jids_already_in_db()
def write(self, kind, msg, jid, show = None, tim = None):
def get_jids_already_in_db(self):
con = sqlite.connect(LOG_DB_PATH)
cur = con.cursor()
cur.execute('SELECT jid FROM jids')
rows = cur.fetchall() # list of tupples: (u'aaa@bbb',), (u'cc@dd',)]
self.jids_already_in = []
for row in rows:
# row[0] is first item of row (the only result here, the jid)
self.jids_already_in.append(row[0])
con.close()
GOT_JIDS_ALREADY_IN_DB = True
def jid_is_from_pm(cur, jid):
'''if jid is gajim@conf/nkour it's likely a pm one, how we know
gajim@conf is not a normal guy and nkour is not his resource?
we ask if gajim@conf is already in jids (as room)
this fails if user disable logging for room and only enables for
pm (so higly unlikely) and if we fail we do not force chaos
(user will see the first pm as if it was message in room's public chat)'''
possible_room_jid, possible_nick = jid.split('/', 1)
cur.execute('SELECT jid_id FROM jids WHERE jid="%s"' % possible_room_jid)
jid_id = cur.fetchone()[0]
if jid_id:
return True
else:
return False
def get_jid_id(self, jid):
'''jids table has jid and jid_id
logs table has log_id, jid_id, contact_name, time, kind, show, message
so to ask logs we need jid_id that matches our jid in jids table
this method asks jid and returns the jid_id for later sql-ing on logs
'''
con = sqlite.connect(LOG_DB_PATH)
cur = con.cursor()
if jid in self.jids_already_in: # we already have jids in DB
cur.execute('SELECT jid_id FROM jids WHERE jid="%s"' % jid)
jid_id = cur.fetchone()[0]
else: # oh! a new jid :), we add him now
cur.execute('INSERT INTO jids (jid) VALUES (?)', (jid,))
con.commit()
jid_id = cur.lastrowid
self.jids_already_in.append(jid)
return jid_id
def write(self, kind, jid, message = None, show = None, tim = None):
'''write a row (status, gcstatus, message etc) to logs database
kind can be status, gcstatus, gc_msg, (we only recv for those 3),
single_msg_recv, chat_msg_recv, chat_msg_sent, single_msg_sent
we cannot know if it is pm or normal chat message, we try to guess
see jid_is_from_pm()
we analyze jid and store it as follows:
jids.jid text column will hold JID if TC-related, room_jid if GC-related,
ROOM_JID/nick if pm-related.'''
if not GOT_JIDS_ALREADY_IN_DB:
self.get_jids_already_in_db()
con = sqlite.connect(LOG_DB_PATH)
cur = con.cursor()
jid = jid.lower()
if not tim:
tim = time.time()
contact_name_col = None # holds nickname for kinds gcstatus, gc_msg
# message holds the message unless kind is status or gcstatus,
# then it holds status message
message_col = message
show_col = show
if tim:
time_col = int(float(time.mktime(tim)))
else:
tim = time.mktime(tim)
time_col = int(float(time.time()))
if not msg:
msg = ''
def commit_to_db(values, cur = cur):
sql = 'INSERT INTO logs (jid_id, contact_name, time, kind, show, message) '\
'VALUES (?, ?, ?, ?, ?, ?)'
cur.execute(sql, values)
cur.connection.commit()
jid_id = self.get_jid_id(jid)
if kind == 'status': # we store (not None) time, jid, show, msg
# status for roster items
if show is None:
show_col = 'online'
msg = helpers.to_one_line(msg)
if len(jid.split('/')) > 1:
ji, nick = jid.split('/', 1)
else:
ji = jid
nick = ''
files = []
if kind == 'status': # we save time:jid:show:msg
if not show:
show = 'online'
if common.gajim.config.get('log_notif_in_user_file'):
path_to_file = os.path.join(common.gajim.LOGPATH, ji)
if os.path.isdir(path_to_file):
jid = 'gcstatus'
msg = show + ':' + msg
show = nick
files.append(ji + '/' + ji)
if os.path.isfile(jid):
files.append(jid)
else:
files.append(ji)
if common.gajim.config.get('log_notif_in_sep_file'):
files.append('notify.log')
elif kind == 'incoming': # we save time:recv:message
path_to_file = os.path.join(common.gajim.LOGPATH, ji)
if os.path.isdir(path_to_file):
files.append(jid)
else:
files.append(ji)
jid = 'recv'
show = msg
msg = ''
elif kind == 'outgoing': # we save time:sent:message
path_to_file = os.path.join(common.gajim.LOGPATH, ji)
if os.path.isdir(path_to_file):
files.append(jid)
values = (jid_id, contact_name_col, time_col, kind, show_col, message_col)
commit_to_db(values)
elif kind == 'gcstatus':
# status in ROOM (for pm status see status)
if show is None:
show_col = 'online'
jid, nick = jid.split('/', 1)
jid_id = self.get_jid_id(jid) # re-get jid_id for the new jid
contact_name_col = nick
values = (jid_id, contact_name_col, time_col, kind, show_col, message_col)
commit_to_db(values)
elif kind == 'gc_msg':
if jid.find('/') != -1: # if it has a /
jid, nick = jid.split('/', 1)
else:
files.append(ji)
jid = 'sent'
show = msg
msg = ''
elif kind == 'gc': # we save time:gc:nick:message
# create the folder if needed
ji_fn = os.path.join(common.gajim.LOGPATH, ji)
if os.path.isfile(ji_fn):
os.remove(ji_fn)
if not os.path.isdir(ji_fn):
os.mkdir(ji_fn, 0700)
files.append(ji + '/' + ji)
jid = 'gc'
show = nick
# convert to utf8 before writing to file if needed
if isinstance(tim, unicode):
tim = tim.encode('utf-8')
if isinstance(jid, unicode):
jid = jid.encode('utf-8')
if isinstance(show, unicode):
show = show.encode('utf-8')
if msg and isinstance(msg, unicode):
msg = msg.encode('utf-8')
for f in files:
path_to_file = os.path.join(common.gajim.LOGPATH, f)
if os.path.isdir(path_to_file):
return
# this does it rw-r-r by default but is in a dir with 700 so it's ok
fil = open(path_to_file, 'a')
fil.write('%s:%s:%s' % (tim, jid, show))
if msg:
fil.write(':' + msg)
fil.write('\n')
fil.close()
def __get_path_to_file(self, fjid):
jid = fjid.split('/')[0]
path_to_file = os.path.join(common.gajim.LOGPATH, jid)
if os.path.isdir(path_to_file):
if fjid == jid: # we want to read the gc history
path_to_file = os.path.join(common.gajim.LOGPATH, jid + '/' + jid)
else: #we want to read pm history
path_to_file = os.path.join(common.gajim.LOGPATH, fjid)
return path_to_file
# it's server message f.e. error message
# when user tries to ban someone but he's not allowed to
nick = None
jid_id = self.get_jid_id(jid) # re-get jid_id for the new jid
contact_name_col = nick
values = (jid_id, contact_name_col, time_col, kind, show_col, message_col)
commit_to_db(values)
elif kind in ('single_msg_recv', 'chat_msg_recv', 'chat_msg_sent', 'single_msg_sent'):
values = (jid_id, contact_name_col, time_col, kind, show_col, message_col)
commit_to_db(values)
#con.close()
def get_no_of_lines(self, fjid):
'''returns total number of lines in a log file
returns 0 if log file does not exist'''
fjid = fjid.lower()
path_to_file = self.__get_path_to_file(fjid)
if not os.path.isfile(path_to_file):
return 0
f = open(path_to_file, 'r')
return len(f.readlines()) # number of lines
def get_last_conversation_lines(self, jid, restore_how_many_rows,
pending_how_many, timeout):
'''accepts how many rows to restore and when to time them out (in minutes)
(mark them as too old) and number of messages that are in queue
and are already logged but pending to be viewed,
returns a list of tupples containg time, kind, message,
list with empty tupple if nothing found to meet our demands'''
now = int(float(time.time()))
jid = jid.lower()
jid_id = self.get_jid_id(jid)
con = sqlite.connect(LOG_DB_PATH)
cur = con.cursor()
# so if we ask last 5 lines and we have 2 pending we get
# 3 - 8 (we avoid the last 2 lines but we still return 5 asked)
cur.execute('''
SELECT time, kind, message FROM logs
WHERE jid_id = %d AND kind IN
('single_msg_recv', 'chat_msg_recv', 'chat_msg_sent', 'single_msg_sent')
ORDER BY time DESC LIMIT %d OFFSET %d
''' % (jid_id, restore_how_many_rows, pending_how_many)
)
# FIXME: remove me when refactor in TC is done
def read_from_line_to_line(self, fjid, begin_from_line, end_line):
'''returns the text in the lines (list),
returns empty list if log file does not exist'''
fjid = fjid.lower()
path_to_file = self.__get_path_to_file(fjid)
if not os.path.isfile(path_to_file):
return []
results = cur.fetchall()
results.reverse()
return results
lines = []
def get_conversation_for_date(self, jid, year, month, day):
'''returns contact_name, time, kind, show, message
for each row in a list of tupples,
returns list with empty tupple if we found nothing to meet our demands'''
jid = jid.lower()
jid_id = self.get_jid_id(jid)
fil = open(path_to_file, 'r')
#fil.readlines(begin_from_line) # skip the previous lines
no_of_lines = begin_from_line # number of lines between being and end
while (no_of_lines < begin_from_line and fil.readline()):
no_of_lines += 1
# gimme unixtime from year month day:
d = datetime.date(2005, 10, 3)
local_time = d.timetuple() # time tupple (compat with time.localtime())
start_of_day = int(time.mktime(local_time)) # we have time since epoch baby :)
print begin_from_line, end_line
while no_of_lines < end_line:
line = fil.readline().decode('utf-8')
print `line`, '@', no_of_lines
if line:
line = helpers