From d35a9f6a10bf649a87f90c9354564b0fc020fb3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20H=C3=B6rist?= <forenjunkie@chello.at>
Date: Sat, 15 Sep 2018 20:45:38 +0200
Subject: [PATCH] Add a configurable threshold for MAM in MUC

---
 gajim/common/config.py      |  3 ++
 gajim/common/const.py       |  7 ++++
 gajim/common/helpers.py     | 11 +++++
 gajim/common/logger.py      | 39 ++++++++++++------
 gajim/common/modules/mam.py | 82 ++++++++++++++++++++++++++-----------
 gajim/groupchat_control.py  | 40 +++++++++++++++++-
 gajim/gtk/history_sync.py   |  2 +-
 gajim/gui_menu_builder.py   | 27 ++++++++++--
 8 files changed, 168 insertions(+), 43 deletions(-)

diff --git a/gajim/common/config.py b/gajim/common/config.py
index e172618d81..ba49e256cf 100644
--- a/gajim/common/config.py
+++ b/gajim/common/config.py
@@ -293,6 +293,9 @@ class Config:
         'pgp_encoding': [opt_str, '', _('Sets the encoding used by python-gnupg'), True],
         'remote_commands': [opt_bool, False, _('If true, Gajim will execute XEP-0146 Commands.')],
         'dark_theme': [opt_int, 2, _('2: System, 1: Enabled, 0: Disabled')],
+        'threshold_options': [opt_str, '1, 2, 4, 10, 0', _('Options in days which can be chosen in the sync threshold menu'), True],
+        'public_room_sync_threshold': [opt_int, 1, _('Maximum history in days we request from a public room archive. 0: As much as possible')],
+        'private_room_sync_threshold': [opt_int, 0, _('Maximum history in days we request from a private room archive. 0: As much as possible')],
     }, {})  # type: Tuple[Dict[str, List[Any]], Dict[Any, Any]]
 
     __options_per_key = {
diff --git a/gajim/common/const.py b/gajim/common/const.py
index 8e92af7014..ee40994449 100644
--- a/gajim/common/const.py
+++ b/gajim/common/const.py
@@ -186,6 +186,13 @@ class Chatstate(IntEnum):
         return self.name.lower()
 
 
+class SyncThreshold(IntEnum):
+    NO_THRESHOLD = 0
+
+    def __str__(self):
+        return str(self.value)
+
+
 ACTIVITIES = {
     'doing_chores': {
         'category': _('Doing Chores'),
diff --git a/gajim/common/helpers.py b/gajim/common/helpers.py
index 0e87ec5862..2260a2e2ca 100644
--- a/gajim/common/helpers.py
+++ b/gajim/common/helpers.py
@@ -51,6 +51,7 @@ from gajim.common import configpaths
 from gajim.common.i18n import Q_
 from gajim.common.i18n import _
 from gajim.common.i18n import ngettext
+from gajim.common.caps_cache import muc_caps_cache
 
 try:
     import precis_i18n.codec  # pylint: disable=unused-import
@@ -1481,3 +1482,13 @@ def call_counter(func):
         self._connect_machine_calls += 1
         return func(self, restart=False)
     return helper
+
+def get_sync_threshold(jid, archive_info):
+    if archive_info is None or archive_info.sync_threshold is None:
+        if muc_caps_cache.supports(jid, 'muc#roomconfig_membersonly'):
+            threshold = app.config.get('private_room_sync_threshold')
+        else:
+            threshold = app.config.get('public_room_sync_threshold')
+        app.logger.set_archive_infos(jid, sync_threshold=threshold)
+        return threshold
+    return archive_info.sync_threshold
diff --git a/gajim/common/logger.py b/gajim/common/logger.py
index becb11a1dd..c4480272b5 100644
--- a/gajim/common/logger.py
+++ b/gajim/common/logger.py
@@ -78,11 +78,12 @@ LOGS_SQL_STATEMENT = '''
             jid_id INTEGER PRIMARY KEY UNIQUE,
             last_mam_id TEXT,
             oldest_mam_timestamp TEXT,
-            last_muc_timestamp TEXT
+            last_muc_timestamp TEXT,
+            sync_threshold INTEGER
     );
     CREATE INDEX idx_logs_jid_id_time ON logs (jid_id, time DESC);
     CREATE INDEX idx_logs_stanza_id ON logs (stanza_id);
-    PRAGMA user_version=1;
+    PRAGMA user_version=2;
     '''
 
 CACHE_SQL_STATEMENT = '''
@@ -214,12 +215,16 @@ class Logger:
                 '''CREATE INDEX IF NOT EXISTS idx_logs_stanza_id
                     ON logs(stanza_id)''',
                 'PRAGMA user_version=1'
-                ]
+            ]
 
             self._execute_multiple(con, statements)
 
         if self._get_user_version(con) < 2:
-            pass
+            statements = [
+                'ALTER TABLE last_archive_message ADD COLUMN "sync_threshold" INTEGER',
+                'PRAGMA user_version=2'
+            ]
+            self._execute_multiple(con, statements)
 
     def _migrate_cache(self, con):
         if self._get_user_version(con) == 0:
@@ -1394,20 +1399,20 @@ class Logger:
         self._con.execute(sql, (sha, account_jid_id, jid_id))
         self._timeout_commit()
 
-    def get_archive_timestamp(self, jid, type_=None):
+    def get_archive_infos(self, jid):
         """
-        Get the last archive id/timestamp for a jid
+        Get the archive infos
 
         :param jid:     The jid that belongs to the avatar
 
         """
-        jid_id = self.get_jid_id(jid, type_=type_)
+        jid_id = self.get_jid_id(jid, type_=JIDConstant.ROOM_TYPE)
         sql = '''SELECT * FROM last_archive_message WHERE jid_id = ?'''
         return self._con.execute(sql, (jid_id,)).fetchone()
 
-    def set_archive_timestamp(self, jid, **kwargs):
+    def set_archive_infos(self, jid, **kwargs):
         """
-        Set the last archive id/timestamp
+        Set archive infos
 
         :param jid:                     The jid that belongs to the avatar
 
@@ -1419,20 +1424,28 @@ class Logger:
         :param last_muc_timestamp:      The timestamp of the last message we
                                         received in a MUC
 
+        :param sync_threshold:          The max days that we request from a
+                                        MUC archive
+
         """
         jid_id = self.get_jid_id(jid)
-        exists = self.get_archive_timestamp(jid)
+        exists = self.get_archive_infos(jid)
         if not exists:
-            sql = '''INSERT INTO last_archive_message VALUES (?, ?, ?, ?)'''
+            sql = '''INSERT INTO last_archive_message
+                     (jid_id, last_mam_id, oldest_mam_timestamp,
+                      last_muc_timestamp, sync_threshold)
+                      VALUES (?, ?, ?, ?, ?)'''
             self._con.execute(sql, (
                 jid_id,
                 kwargs.get('last_mam_id', None),
                 kwargs.get('oldest_mam_timestamp', None),
-                kwargs.get('last_muc_timestamp', None)))
+                kwargs.get('last_muc_timestamp', None),
+                kwargs.get('sync_threshold', None)
+            ))
         else:
             args = ' = ?, '.join(kwargs.keys()) + ' = ?'
             sql = '''UPDATE last_archive_message SET {}
                      WHERE jid_id = ?'''.format(args)
             self._con.execute(sql, tuple(kwargs.values()) + (jid_id,))
-        log.info('Save archive timestamps: %s', kwargs)
+        log.info('Save archive infos: %s', kwargs)
         self._timeout_commit()
diff --git a/gajim/common/modules/mam.py b/gajim/common/modules/mam.py
index c91d180bc7..0db0e8561e 100644
--- a/gajim/common/modules/mam.py
+++ b/gajim/common/modules/mam.py
@@ -15,14 +15,18 @@
 # XEP-0313: Message Archive Management
 
 import logging
+import time
 from datetime import datetime, timedelta
 
 import nbxmpp
 
 from gajim.common import app
 from gajim.common.nec import NetworkIncomingEvent
-from gajim.common.const import ArchiveState, JIDConstant, KindConstant
+from gajim.common.const import ArchiveState
+from gajim.common.const import KindConstant
+from gajim.common.const import SyncThreshold
 from gajim.common.caps_cache import muc_caps_cache
+from gajim.common.helpers import get_sync_threshold
 from gajim.common.modules.misc import parse_delay
 from gajim.common.modules.misc import parse_oob
 from gajim.common.modules.misc import parse_correction
@@ -352,7 +356,7 @@ class MAM:
             log.warning('MAM request for %s already running', own_jid)
             return
 
-        archive = app.logger.get_archive_timestamp(own_jid)
+        archive = app.logger.get_archive_infos(own_jid)
 
         # Migration of last_mam_id from config to DB
         if archive is not None:
@@ -379,16 +383,12 @@ class MAM:
         self._send_archive_query(query, query_id, start_date)
 
     def request_archive_on_muc_join(self, jid):
-        archive = app.logger.get_archive_timestamp(
-            jid, type_=JIDConstant.ROOM_TYPE)
+        archive = app.logger.get_archive_infos(jid)
+        threshold = get_sync_threshold(jid, archive)
+        log.info('Threshold for %s: %s', jid, threshold)
         query_id = self._get_query_id(jid)
         start_date = None
-        if archive is not None:
-            log.info('Request from archive %s after %s:',
-                     jid, archive.last_mam_id)
-            query = self._get_archive_query(
-                query_id, jid=jid, after=archive.last_mam_id)
-        else:
+        if archive is None or archive.last_mam_id is None:
             # First Start, we dont request history
             # Depending on what a MUC saves, there could be thousands
             # of Messages even in just one day.
@@ -397,6 +397,37 @@ class MAM:
             query = self._get_archive_query(
                 query_id, jid=jid, start=start_date)
 
+        elif threshold == SyncThreshold.NO_THRESHOLD:
+            # Not our first join and no threshold set
+            log.info('Request from archive: %s, after mam-id %s',
+                     jid, archive.last_mam_id)
+            query = self._get_archive_query(
+                query_id, jid=jid, after=archive.last_mam_id)
+
+        else:
+            # Not our first join, check how much time elapsed since our
+            # last join and check against threshold
+            last_timestamp = archive.last_muc_timestamp
+            if last_timestamp is None:
+                log.info('No last muc timestamp found ( mam:1? )')
+                last_timestamp = 0
+
+            last = datetime.utcfromtimestamp(float(last_timestamp))
+            if datetime.utcnow() - last > timedelta(days=threshold):
+                # To much time has elapsed since last join, apply threshold
+                start_date = datetime.utcnow() - timedelta(days=threshold)
+                log.info('Too much time elapsed since last join, '
+                         'request from: %s, threshold: %s',
+                         start_date, threshold)
+                query = self._get_archive_query(
+                    query_id, jid=jid, start=start_date)
+            else:
+                # Request from last mam-id
+                log.info('Request from archive %s after %s:',
+                         jid, archive.last_mam_id)
+                query = self._get_archive_query(
+                    query_id, jid=jid, after=archive.last_mam_id)
+
         if jid in self._catch_up_finished:
             self._catch_up_finished.remove(jid)
         self._send_archive_query(query, query_id, start_date, groupchat=True)
@@ -424,20 +455,22 @@ class MAM:
             return
 
         complete = fin.getAttr('complete')
-        app.logger.set_archive_timestamp(
-            jid, last_mam_id=last, last_muc_timestamp=None)
         if complete != 'true':
+            app.logger.set_archive_infos(jid, last_mam_id=last)
             self._mam_query_ids.pop(jid)
             query_id = self._get_query_id(jid)
             query = self._get_archive_query(query_id, jid=jid, after=last)
             self._send_archive_query(query, query_id, groupchat=groupchat)
         else:
             self._mam_query_ids.pop(jid)
-            if start_date is not None:
-                app.logger.set_archive_timestamp(
-                    jid,
-                    last_mam_id=last,
-                    oldest_mam_timestamp=start_date.timestamp())
+            app.logger.set_archive_infos(
+                jid, last_mam_id=last, last_muc_timestamp=time.time())
+            if start_date is not None and not groupchat:
+                # Record the earliest timestamp we request from
+                # the account archive. For the account archive we only
+                # set start_date at the very first request.
+                app.logger.set_archive_infos(
+                    jid, oldest_mam_timestamp=start_date.timestamp())
 
             self._catch_up_finished.append(jid)
             log.info('End of MAM query, last mam id: %s', last)
@@ -481,7 +514,7 @@ class MAM:
         if last is None:
             app.nec.push_incoming_event(ArchivingIntervalFinished(
                 None, query_id=query_id))
-            app.logger.set_archive_timestamp(
+            app.logger.set_archive_infos(
                 jid, oldest_mam_timestamp=timestamp)
             log.info('End of MAM request, no items retrieved')
             return
@@ -491,7 +524,7 @@ class MAM:
             self.request_archive_interval(start_date, end_date, last, query_id)
         else:
             log.info('Request finished')
-            app.logger.set_archive_timestamp(
+            app.logger.set_archive_infos(
                 jid, oldest_mam_timestamp=timestamp)
             app.nec.push_incoming_event(ArchivingIntervalFinished(
                 None, query_id=query_id))
@@ -536,15 +569,18 @@ class MAM:
         return iq
 
     def save_archive_id(self, jid, stanza_id, timestamp):
-        if stanza_id is None:
-            return
         if jid is None:
             jid = self._con.get_own_jid().getStripped()
         if jid not in self._catch_up_finished:
             return
         log.info('Save: %s: %s, %s', jid, stanza_id, timestamp)
-        app.logger.set_archive_timestamp(
-            jid, last_mam_id=stanza_id, last_muc_timestamp=timestamp)
+        if stanza_id is None:
+            # mam:1
+            app.logger.set_archive_infos(jid, last_muc_timestamp=timestamp)
+        else:
+            # mam:2
+            app.logger.set_archive_infos(
+                jid, last_mam_id=stanza_id, last_muc_timestamp=timestamp)
 
     def request_mam_preferences(self):
         log.info('Request MAM preferences')
diff --git a/gajim/groupchat_control.py b/gajim/groupchat_control.py
index a18df86808..20281200b9 100644
--- a/gajim/groupchat_control.py
+++ b/gajim/groupchat_control.py
@@ -60,6 +60,7 @@ from gajim.common import i18n
 from gajim.common import contacts
 from gajim.common.const import StyleAttr
 from gajim.common.const import Chatstate
+
 from gajim.chat_control import ChatControl
 from gajim.chat_control_base import ChatControlBase
 
@@ -548,7 +549,7 @@ class GroupchatControl(ChatControlBase):
             ('request-voice-', self._on_request_voice),
             ('execute-command-', self._on_execute_command),
             ('upload-avatar-', self._on_upload_avatar),
-            ]
+        ]
 
         for action in actions:
             action_name, func = action
@@ -575,6 +576,17 @@ class GroupchatControl(ChatControlBase):
         act.connect('change-state', self._on_notify_on_all_messages)
         self.parent_win.window.add_action(act)
 
+        archive_info = app.logger.get_archive_infos(self.contact.jid)
+        threshold = helpers.get_sync_threshold(self.contact.jid,
+                                               archive_info)
+
+        inital = GLib.Variant.new_string(str(threshold))
+        act = Gio.SimpleAction.new_stateful(
+            'choose-sync-' + self.control_id,
+            inital.get_type(), inital)
+        act.connect('change-state', self._on_sync_threshold)
+        self.parent_win.window.add_action(act)
+
     def update_actions(self):
         if self.parent_win is None:
             return
@@ -638,6 +650,25 @@ class GroupchatControl(ChatControlBase):
         win.lookup_action('upload-avatar-' + self.control_id).set_enabled(
             self.is_connected and vcard_support and contact.affiliation == 'owner')
 
+        # Sync Threshold
+        has_mam = muc_caps_cache.has_mam(self.room_jid)
+        win.lookup_action('choose-sync-' + self.control_id).set_enabled(has_mam)
+
+    def _on_room_created(self):
+        if self.parent_win is None:
+            return
+        win = self.parent_win.window
+        self.update_actions()
+
+        # After the room has been created, reevaluate threshold
+        if muc_caps_cache.has_mam(self.contact.jid):
+            archive_info = app.logger.get_archive_infos(self.contact.jid)
+            threshold = helpers.get_sync_threshold(self.contact.jid,
+                                                   archive_info)
+            win.change_action_state('choose-sync-%s' % self.control_id,
+                                    GLib.Variant('s', str(threshold)))
+
+
     def _connect_window_state_change(self, parent_win):
         if self._state_change_handler_id is None:
             id_ = parent_win.window.connect('notify::is-maximized',
@@ -755,6 +786,11 @@ class GroupchatControl(ChatControlBase):
         app.config.set_per('rooms', self.contact.jid,
                            'notify_on_all_messages', param.get_boolean())
 
+    def _on_sync_threshold(self, action, param):
+        threshold = param.get_string()
+        action.set_state(param)
+        app.logger.set_archive_infos(self.contact.jid, sync_threshold=threshold)
+
     def _on_execute_command(self, action, param):
         """
         Execute AdHoc commands on the current room
@@ -1838,7 +1874,7 @@ class GroupchatControl(ChatControlBase):
                 self.print_conversation(_('Room logging is enabled'))
             if '201' in obj.status_code:
                 app.connections[self.account].get_module('Discovery').disco_muc(
-                    self.room_jid, self.update_actions, update=True)
+                    self.room_jid, self._on_room_created, update=True)
                 self.print_conversation(_('A new room has been created'))
             if '210' in obj.status_code:
                 self.print_conversation(\
diff --git a/gajim/gtk/history_sync.py b/gajim/gtk/history_sync.py
index 918da6cad7..c9f7a45948 100644
--- a/gajim/gtk/history_sync.py
+++ b/gajim/gtk/history_sync.py
@@ -53,7 +53,7 @@ class HistorySyncAssistant(Gtk.Assistant):
         own_jid = self.con.get_own_jid().getStripped()
 
         mam_start = ArchiveState.NEVER
-        archive = app.logger.get_archive_timestamp(own_jid)
+        archive = app.logger.get_archive_infos(own_jid)
         if archive is not None and archive.oldest_mam_timestamp is not None:
             mam_start = int(float(archive.oldest_mam_timestamp))
 
diff --git a/gajim/gui_menu_builder.py b/gajim/gui_menu_builder.py
index d42bb800bf..05b95fec78 100644
--- a/gajim/gui_menu_builder.py
+++ b/gajim/gui_menu_builder.py
@@ -23,6 +23,7 @@ from gajim import message_control
 from gajim.gtkgui_helpers import get_action
 from gajim.common import app
 from gajim.common import helpers
+from gajim.common.i18n import ngettext
 
 
 def build_resources_submenu(contacts, account, action, room_jid=None,
@@ -634,7 +635,8 @@ def get_groupchat_menu(control_id):
             ('win.configure-', _('Configure Room')),
             ('win.upload-avatar-', _('Upload Avatar…')),
             ('win.destroy-', _('Destroy Room')),
-            ]),
+        ]),
+        (_('Sync Threshold'), []),
         ('win.change-nick-', _('Change Nick')),
         ('win.bookmark-', _('Bookmark Room')),
         ('win.request-voice-', _('Request Voice')),
@@ -643,7 +645,7 @@ def get_groupchat_menu(control_id):
         ('win.execute-command-', _('Execute command')),
         ('win.browse-history-', _('History')),
         ('win.disconnect-', _('Disconnect')),
-        ]
+    ]
 
     def build_menu(preset):
         menu = Gio.Menu()
@@ -656,11 +658,28 @@ def get_groupchat_menu(control_id):
                     menu.append(label, action_name + control_id)
             else:
                 label, sub_menu = item
-                # This is a submenu
-                submenu = build_menu(sub_menu)
+                if not sub_menu:
+                    # Sync threshold menu
+                    submenu = build_sync_menu()
+                else:
+                    # This is a submenu
+                    submenu = build_menu(sub_menu)
                 menu.append_submenu(label, submenu)
         return menu
 
+    def build_sync_menu():
+        menu = Gio.Menu()
+        days = app.config.get('threshold_options').split(',')
+        days = [int(day) for day in days]
+        action_name = 'win.choose-sync-%s::' % control_id
+        for day in days:
+            if day == 0:
+                label = _('No threshold')
+            else:
+                label = ngettext('%i day', '%i days', day, day, day)
+            menu.append(label, '%s%s' % (action_name, day))
+        return menu
+
     return build_menu(groupchat_menu)
 
 
-- 
GitLab