htmltextview.py 42.6 KB
Newer Older
roidelapluie's avatar
roidelapluie committed
1
# -*- coding:utf-8 -*-
roidelapluie's avatar
roidelapluie committed
2 3 4 5 6
## src/htmltextview.py
##
## Copyright (C) 2005 Gustavo J. A. M. Carneiro
## Copyright (C) 2006 Santiago Gala
## Copyright (C) 2006-2007 Jean-Marie Traissard <jim AT lapin.org>
7
## Copyright (C) 2006-2010 Yann Leboulanger <asterix AT lagaule.org>
roidelapluie's avatar
roidelapluie committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## Copyright (C) 2007 Nikos Kouremenos <kourem AT gmail.com>
## Copyright (C) 2008 Jonathan Schleifer <js-gajim AT webkeks.org>
##                    Julien Pivotto <roidelapluie AT gmail.com>
##                    Stephan Erb <steve-e AT h3c.de>
##
## This file is part of Gajim.
##
## Gajim is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published
## by the Free Software Foundation; version 3 only.
##
## Gajim is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
23 24
##
## You should have received a copy of the GNU General Public License
roidelapluie's avatar
roidelapluie committed
25 26
## along with Gajim. If not, see <http://www.gnu.org/licenses/>.
##
27

28
"""
29
A gtk.TextView-based renderer for XHTML-IM, as described in:
30
  http://xmpp.org/extensions/xep-0071.html
31 32 33 34 35

Starting with the version posted by Gustavo Carneiro,
I (Santiago Gala) am trying to make it more compatible
with the markup that docutils generate, and also more
modular.
36
"""
37 38 39 40 41 42 43

import gobject
import pango
import gtk
import xml.sax, xml.sax.handler
import re
from cStringIO import StringIO
44
import socket
45
import time
46 47 48
import urllib2
import operator

49
if __name__ == '__main__':
50 51
    from common import i18n
    import common.configpaths
52
    common.configpaths.gajimpaths.init_profile()
53
    common.configpaths.gajimpaths.init(None)
54
    import gtkgui_helpers
jimpp's avatar
jimpp committed
55
from common import gajim
56
from gtkgui_helpers import get_icon_pixmap
57 58

import tooltips
59 60
import logging
log = logging.getLogger('gajim.htmlview')
61 62 63

__all__ = ['HtmlTextView']

64 65
whitespace_rx = re.compile('\\s+')
allwhitespace_rx = re.compile('^\\s*$')
66

67
# pixels = points * display_resolution
68
display_resolution = 0.3514598*(gtk.gdk.screen_height() /
69
                                        float(gtk.gdk.screen_height_mm()))
70

71
# embryo of CSS classes
72
classes = {
73
        #'system-message':';display: none',
74
        'problematic': ';color: red',
75 76
}

77
# styles for elements
78
element_styles = {
79 80 81 82 83 84 85 86 87 88
                'u'                     : ';text-decoration: underline',
                'em'            : ';font-style: oblique',
                'cite'          : '; background-color:rgb(170,190,250); font-style: oblique',
                'li'            : '; margin-left: 1em; margin-right: 10%',
                'strong'        : ';font-weight: bold',
                'pre'           : '; background-color:rgb(190,190,190); font-family: monospace; white-space: pre; margin-left: 1em; margin-right: 10%',
                'kbd'           : ';background-color:rgb(210,210,210);font-family: monospace',
                'blockquote': '; background-color:rgb(170,190,250); margin-left: 2em; margin-right: 10%',
                'dt'            : ';font-weight: bold; font-style: oblique',
                'dd'            : ';margin-left: 2em; font-style: oblique'
89 90 91 92 93 94 95 96 97
}
# no difference for the moment
element_styles['dfn'] = element_styles['em']
element_styles['var'] = element_styles['em']
# deprecated, legacy, presentational
element_styles['tt']  = element_styles['kbd']
element_styles['i']   = element_styles['em']
element_styles['b']   = element_styles['strong']

98
# ==========
roidelapluie's avatar
roidelapluie committed
99
#   XEP-0071
100
# ==========
101
#
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
# This Integration Set includes a subset of the modules defined for
# XHTML 1.0 but does not redefine any existing modules, nor
# does it define any new modules. Specifically, it includes the
# following modules only:
#
# - Structure
# - Text
#
#   * Block
#
#     phrasal
#        addr, blockquote, pre
#     Struc
#        div,p
#     Heading
#        h1, h2, h3, h4, h5, h6
#
#   * Inline
#
#     phrasal
#        abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var
#     structural
#        br, span
#
# - Hypertext (a)
# - List (ul, ol, dl)
# - Image (img)
# - Style Attribute
#
# Therefore XHTML-IM uses the following content models:
#
#   Block.mix
#             Block-like elements, e.g., paragraphs
#   Flow.mix
#             Any block or inline elements
#   Inline.mix
#             Character-level elements
#   InlineNoAnchor.class
140
#                       Anchor element
141 142 143 144 145 146
#   InlinePre.mix
#             Pre element
#
# XHTML-IM also uses the following Attribute Groups:
#
# Core.extra.attrib
147
#       TBD
148
# I18n.extra.attrib
149
#       TBD
150
# Common.extra
151
#       style
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
#
#
# ...
# block level:
# Heading    h
#            ( pres           = h1 | h2 | h3 | h4 | h5 | h6 )
# Block      ( phrasal        = address | blockquote | pre )
# NOT           ( presentational = hr )
#            ( structural     = div | p )
# other:     section
# Inline     ( phrasal        = abbr | acronym | cite | code | dfn | em |
#                               kbd | q | samp | strong | var )
# NOT        ( presentational =  b  | big | i | small | sub | sup | tt )
#            ( structural     =  br | span )
# Param/Legacy    param, font, basefont, center, s, strike, u, dir, menu,
#                 isindex
168 169 170 171 172

BLOCK_HEAD = set(( 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', ))
BLOCK_PHRASAL = set(( 'address', 'blockquote', 'pre', ))
BLOCK_PRES = set(( 'hr', )) #not in xhtml-im
BLOCK_STRUCT = set(( 'div', 'p', ))
173
BLOCK_HACKS = set(( 'table', 'tr' )) # at the very least, they will start line ;)
174 175 176 177 178 179 180 181 182 183
BLOCK = BLOCK_HEAD.union(BLOCK_PHRASAL).union(BLOCK_STRUCT).union(BLOCK_PRES).union(BLOCK_HACKS)

INLINE_PHRASAL = set('abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var'.split(', '))
INLINE_PRES = set('b, i, u, tt'.split(', ')) #not in xhtml-im
INLINE_STRUCT = set('br, span'.split(', '))
INLINE = INLINE_PHRASAL.union(INLINE_PRES).union(INLINE_STRUCT)

LIST_ELEMS = set( 'dl, ol, ul'.split(', '))

for name in BLOCK_HEAD:
184 185 186 187
    num = eval(name[1])
    size = (num-1) // 2
    weigth = (num - 1) % 2
    element_styles[name] = '; font-size: %s; %s' % ( ('large', 'medium', 'small')[size],
188
        ('font-weight: bold', 'font-style: oblique')[weigth],)
189 190

def _parse_css_color(color):
191 192 193 194 195
    if color.startswith('rgb(') and color.endswith(')'):
        r, g, b = [int(c)*257 for c in color[4:-1].split(',')]
        return gtk.gdk.Color(r, g, b)
    else:
        return gtk.gdk.color_parse(color)
196 197

def style_iter(style):
198 199
    return ([x.strip() for x in item.split(':', 1)] for item in style.split(';')\
            if len(item.strip()))
200

201 202

class HtmlHandler(xml.sax.handler.ContentHandler):
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
    """
    A handler to display html to a gtk textview

    It keeps a stack of "style spans" (start/end element pairs) and a stack of
    list counters, for nested lists.
    """
    def __init__(self, conv_textview, startiter):
        xml.sax.handler.ContentHandler.__init__(self)
        self.textbuf = conv_textview.tv.get_buffer()
        self.textview = conv_textview.tv
        self.iter = startiter
        self.conv_textview = conv_textview
        self.text = ''
        self.starting=True
        self.preserve = False
        self.styles = [] # a gtk.TextTag or None, for each span level
        self.list_counters = [] # stack (top at head) of list
220
                                # counters, or None for unordered list
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238

    def _parse_style_color(self, tag, value):
        color = _parse_css_color(value)
        tag.set_property('foreground-gdk', color)

    def _parse_style_background_color(self, tag, value):
        color = _parse_css_color(value)
        tag.set_property('background-gdk', color)
        tag.set_property('paragraph-background-gdk', color)


    def _get_current_attributes(self):
        attrs = self.textview.get_default_attributes()
        self.iter.backward_char()
        self.iter.get_attributes(attrs)
        self.iter.forward_char()
        return attrs

239 240
    def __parse_length_frac_size_allocate(self, textview, allocation, frac,
        callback, args):
241 242
        callback(allocation.width*frac, *args)

243 244
    def _parse_length(self, value, font_relative, block_relative, minl, maxl,
        callback, *args):
245 246 247 248 249 250
        """
        Parse/calc length, converting to pixels, calls callback(length, *args)
        when the length is first computed or changes
        """
        if value.endswith('%'):
            val = float(value[:-1])
251
            sign = cmp(val, 0)
252
            # limits: 1% to 500%
253
            val = sign*max(1, min(abs(val), 500))
254 255 256 257 258 259 260 261 262 263 264 265
            frac = val/100
            if font_relative:
                attrs = self._get_current_attributes()
                font_size = attrs.font.get_size() / pango.SCALE
                callback(frac*display_resolution*font_size, *args)
            elif block_relative:
                # CSS says 'Percentage values: refer to width of the closest
                #           block-level ancestor'
                # This is difficult/impossible to implement, so we use
                # textview width instead; a reasonable approximation..
                alloc = self.textview.get_allocation()
                self.__parse_length_frac_size_allocate(self.textview, alloc,
266
                    frac, callback, args)
267
                self.textview.connect('size-allocate',
268 269
                    self.__parse_length_frac_size_allocate,
                    frac, callback, args)
270 271 272 273 274 275
            else:
                callback(frac, *args)
            return

        def get_val():
            val = float(value[:-2])
276
            sign = cmp(val, 0)
277
            # validate length
278
            return sign*max(minl, min(abs(val*display_resolution), maxl))
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
        if value.endswith('pt'): # points
            callback(get_val()*display_resolution, *args)

        elif value.endswith('em'): # ems, the width of the element's font
            attrs = self._get_current_attributes()
            font_size = attrs.font.get_size() / pango.SCALE
            callback(get_val()*display_resolution*font_size, *args)

        elif value.endswith('ex'): # x-height, ~ the height of the letter 'x'
            # FIXME: figure out how to calculate this correctly
            #        for now 'em' size is used as approximation
            attrs = self._get_current_attributes()
            font_size = attrs.font.get_size() / pango.SCALE
            callback(get_val()*display_resolution*font_size, *args)

        elif value.endswith('px'): # pixels
            callback(get_val(), *args)

        else:
            try:
                # TODO: isn't "no units" interpreted as pixels?
                val = int(value)
301
                sign = cmp(val, 0)
302
                # validate length
303
                val = sign*max(minl, min(abs(val), maxl))
304 305
                callback(val, *args)
            except Exception:
306
                log.warning('Unable to parse length value "%s"' % value)
307 308 309 310 311 312 313

    def __parse_font_size_cb(length, tag):
        tag.set_property('size-points', length/display_resolution)
    __parse_font_size_cb = staticmethod(__parse_font_size_cb)

    def _parse_style_display(self, tag, value):
        if value == 'none':
314
            tag.set_property('invisible', 'true')
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
        # FIXME: display: block, inline

    def _parse_style_font_size(self, tag, value):
        try:
            scale = {
                    'xx-small': pango.SCALE_XX_SMALL,
                    'x-small': pango.SCALE_X_SMALL,
                    'small': pango.SCALE_SMALL,
                    'medium': pango.SCALE_MEDIUM,
                    'large': pango.SCALE_LARGE,
                    'x-large': pango.SCALE_X_LARGE,
                    'xx-large': pango.SCALE_XX_LARGE,
                    } [value]
        except KeyError:
            pass
        else:
            attrs = self._get_current_attributes()
            tag.set_property('scale', scale / attrs.font_scale)
            return
        if value == 'smaller':
            tag.set_property('scale', pango.SCALE_SMALL)
            return
        if value == 'larger':
            tag.set_property('scale', pango.SCALE_LARGE)
            return
        # font relative (5 ~ 4pt, 110 ~ 72pt)
341 342
        self._parse_length(value, True, False, 5, 110,self.__parse_font_size_cb,
            tag)
343 344 345 346 347 348 349 350 351

    def _parse_style_font_style(self, tag, value):
        try:
            style = {
                    'normal': pango.STYLE_NORMAL,
                    'italic': pango.STYLE_ITALIC,
                    'oblique': pango.STYLE_OBLIQUE,
                    } [value]
        except KeyError:
352
            log.warning('unknown font-style %s' % value)
353 354 355
        else:
            tag.set_property('style', style)

356
    def __frac_length_tag_cb(self, length, tag, propname):
357 358 359 360 361 362 363 364
        styles = self._get_style_tags()
        if styles:
            length += styles[-1].get_property(propname)
        tag.set_property(propname, length)
    #__frac_length_tag_cb = staticmethod(__frac_length_tag_cb)

    def _parse_style_margin_left(self, tag, value):
        # block relative
365 366
        self._parse_length(value, False, True, 1, 1000,
            self.__frac_length_tag_cb, tag, 'left-margin')
367 368 369

    def _parse_style_margin_right(self, tag, value):
        # block relative
370 371
        self._parse_length(value, False, True, 1, 1000,
            self.__frac_length_tag_cb, tag, 'right-margin')
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389

    def _parse_style_font_weight(self, tag, value):
        # TODO: missing 'bolder' and 'lighter'
        try:
            weight = {
                    '100': pango.WEIGHT_ULTRALIGHT,
                    '200': pango.WEIGHT_ULTRALIGHT,
                    '300': pango.WEIGHT_LIGHT,
                    '400': pango.WEIGHT_NORMAL,
                    '500': pango.WEIGHT_NORMAL,
                    '600': pango.WEIGHT_BOLD,
                    '700': pango.WEIGHT_BOLD,
                    '800': pango.WEIGHT_ULTRABOLD,
                    '900': pango.WEIGHT_HEAVY,
                    'normal': pango.WEIGHT_NORMAL,
                    'bold': pango.WEIGHT_BOLD,
                    } [value]
        except KeyError:
390
            log.warning('unknown font-style %s' % value)
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
        else:
            tag.set_property('weight', weight)

    def _parse_style_font_family(self, tag, value):
        tag.set_property('family', value)

    def _parse_style_text_align(self, tag, value):
        try:
            align = {
                    'left': gtk.JUSTIFY_LEFT,
                    'right': gtk.JUSTIFY_RIGHT,
                    'center': gtk.JUSTIFY_CENTER,
                    'justify': gtk.JUSTIFY_FILL,
                    } [value]
        except KeyError:
406
            log.warning('Invalid text-align:%s requested' % value)
407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
        else:
            tag.set_property('justification', align)

    def _parse_style_text_decoration(self, tag, value):
        values = value.split(' ')
        if 'none' in values:
            tag.set_property('underline', pango.UNDERLINE_NONE)
            tag.set_property('strikethrough', False)
        if 'underline' in values:
            tag.set_property('underline', pango.UNDERLINE_SINGLE)
        else:
            tag.set_property('underline', pango.UNDERLINE_NONE)
        if 'line-through' in values:
            tag.set_property('strikethrough', True)
        else:
            tag.set_property('strikethrough', False)
        if 'blink' in values:
424
            log.warning('text-decoration:blink not implemented')
425
        if 'overline' in values:
426
            log.warning('text-decoration:overline not implemented')
427 428 429 430 431 432 433 434 435 436 437 438 439

    def _parse_style_white_space(self, tag, value):
        if value == 'pre':
            tag.set_property('wrap_mode', gtk.WRAP_NONE)
        elif value == 'normal':
            tag.set_property('wrap_mode', gtk.WRAP_WORD)
        elif value == 'nowrap':
            tag.set_property('wrap_mode', gtk.WRAP_NONE)

    def __length_tag_cb(self, value, tag, propname):
        try:
            tag.set_property(propname, value)
        except Exception:
440
            log.warning( "Error with prop: " + propname + " for tag: " + str(tag))
441 442 443 444 445 446


    def _parse_style_width(self, tag, value):
        if value == 'auto':
            return
        self._parse_length(value, False, False, 1, 1000, self.__length_tag_cb,
447
            tag, "width")
448 449 450 451
    def _parse_style_height(self, tag, value):
        if value == 'auto':
            return
        self._parse_length(value, False, False, 1, 1000, self.__length_tag_cb,
452
            tag, "height")
453 454 455 456 457


    # build a dictionary mapping styles to methods, for greater speed
    __style_methods = dict()
    for style in ('background-color', 'color', 'font-family', 'font-size',
458 459 460
                  'font-style', 'font-weight', 'margin-left', 'margin-right',
                  'text-align', 'text-decoration', 'white-space', 'display',
                  'width', 'height' ):
461 462 463
        try:
            method = locals()['_parse_style_%s' % style.replace('-', '_')]
        except KeyError:
464
            log.warning('Style attribute "%s" not yet implemented' % style)
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
        else:
            __style_methods[style] = method
    del style
    # --

    def _get_style_tags(self):
        return [tag for tag in self.styles if tag is not None]

    def _create_url(self, href, title, type_, id_):
        '''Process a url tag.
        '''
        tag = self.textbuf.create_tag(id_)
        if href and href[0] != '#':
            tag.href = href
            tag.type_ = type_ # to be used by the URL handler
480
            tag.connect('event', self.textview.hyperlink_handler, 'url')
481 482 483 484 485 486 487
            tag.set_property('foreground', gajim.config.get('urlmsgcolor'))
            tag.set_property('underline', pango.UNDERLINE_SINGLE)
            tag.is_anchor = True
        if title:
            tag.title = title
        return tag

488
    def _get_img(self, attrs):
489 490
        '''Download an image. This function is launched in a separate thread.
        '''
491
        mem, alt = '', ''
roidelapluie's avatar
roidelapluie committed
492
        # Wait maximum 5s for connection
493
        socket.setdefaulttimeout(5)
494 495 496 497 498 499 500 501 502
        try:
            req = urllib2.Request(attrs['src'])
            req.add_header('User-Agent', 'Gajim ' + gajim.version)
            f = urllib2.urlopen(req)
        except Exception, ex:
            log.debug('Error loading image %s ' % attrs['src']  + str(ex))
            pixbuf = None
            alt = attrs.get('alt', 'Broken image')
        else:
503
            # Wait 0.5s between each byte
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
            try:
                f.fp._sock.fp._sock.settimeout(0.5)
            except Exception:
                pass
            # Max image size = 2 MB (to try to prevent DoS)
            deadline = time.time() + 3
            while True:
                if time.time() > deadline:
                    log.debug(str('Timeout loading image %s ' % \
                        attrs['src'] + ex))
                    mem = ''
                    alt = attrs.get('alt', '')
                    if alt:
                        alt += '\n'
                    alt += _('Timeout loading image')
                    break
                try:
                    temp = f.read(100)
                except socket.timeout, ex:
                    log.debug('Timeout loading image %s ' % \
                        attrs['src'] + str(ex))
                    alt = attrs.get('alt', '')
                    if alt:
                        alt += '\n'
                    alt += _('Timeout loading image')
                    break
                if temp:
                    mem += temp
                else:
                    break
                if len(mem) > 2*1024*1024:
                    alt = attrs.get('alt', '')
                    if alt:
                        alt += '\n'
                    alt += _('Image is too big')
                    break
540 541 542
        return (mem, alt)

    def _update_img(self, (mem, alt), attrs, img_mark):
543 544
        '''Callback function called after the function _get_img above.
        '''
545
        self._process_img(attrs, (mem, alt, img_mark))
546

547
    def _process_img(self, attrs, loaded=None):
548 549 550
        '''Process a img tag.
        '''
        mem = ''
551 552 553 554
        update = False
        pixbuf = None
        replace_mark = None

555
        try:
556 557 558 559 560
            if attrs['src'].startswith('data:image/'):
                # The "data" URL scheme http://tools.ietf.org/html/rfc2397
                import base64
                img = attrs['src'].split(',')[1]
                mem = base64.standard_b64decode(urllib2.unquote(img))
561 562 563
            elif loaded is not None:
                (mem, alt, replace_mark) = loaded
                update = True
564
            else:
565 566 567
                img_mark = self.textbuf.create_mark(None, self.iter, True)
                gajim.thread_interface(self._get_img, [attrs], \
                    self._update_img, [attrs, img_mark])
roidelapluie's avatar
roidelapluie committed
568 569 570 571
                alt = attrs.get('alt', '')
                if alt:
                    alt += '\n'
                alt += _('Loading')
572
                pixbuf = get_icon_pixmap('gajim-receipt_missing')
573 574 575 576 577
            if mem:
                # Caveat: GdkPixbuf is known not to be safe to load
                # images from network... this program is now potentially
                # hackable ;)
                loader = gtk.gdk.PixbufLoader()
578
                dims = [0, 0]
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
                def height_cb(length):
                    dims[1] = length
                def width_cb(length):
                    dims[0] = length
                # process width and height attributes
                w = attrs.get('width')
                h = attrs.get('height')
                # override with width and height styles
                for attr, val in style_iter(attrs.get('style', '')):
                    if attr == 'width':
                        w = val
                    elif attr == 'height':
                        h = val
                if w:
                    self._parse_length(w, False, False, 1, 1000, width_cb)
                if h:
                    self._parse_length(h, False, False, 1, 1000, height_cb)
                def set_size(pixbuf, w, h, dims):
                    """
                    FIXME: Floats should be relative to the whole textview, and
                    resize with it. This needs new pifbufs for every resize,
                    gtk.gdk.Pixbuf.scale_simple or similar.
                    """
                    if isinstance(dims[0], float):
                        dims[0] = int(dims[0]*w)
                    elif not dims[0]:
                        dims[0] = w
                    if isinstance(dims[1], float):
                        dims[1] = int(dims[1]*h)
                    if not dims[1]:
                        dims[1] = h
                    loader.set_size(*dims)
                if w or h:
                    loader.connect('size-prepared', set_size, dims)
                loader.write(mem)
                loader.close()
                pixbuf = loader.get_pixbuf()
                alt = attrs.get('alt', '')
617 618 619 620 621 622 623
            working_iter = self.iter
            if replace_mark is not None:
                working_iter = self.textbuf.get_iter_at_mark(replace_mark)
                next_iter = working_iter.copy()
                next_iter.forward_char()
                self.textbuf.delete(working_iter, next_iter)
                self.textbuf.delete_mark(replace_mark)
624 625 626
            if pixbuf is not None:
                tags = self._get_style_tags()
                if tags:
627 628
                    tmpmark = self.textbuf.create_mark(None, working_iter, True)
                self.textbuf.insert_pixbuf(working_iter, pixbuf)
629 630 631 632
                self.starting = False
                if tags:
                    start = self.textbuf.get_iter_at_mark(tmpmark)
                    for tag in tags:
633
                        self.textbuf.apply_tag(tag, start, working_iter)
634 635
                    self.textbuf.delete_mark(tmpmark)
            else:
636
                self._insert_text('[IMG: %s]' % alt, working_iter)
637
        except Exception, ex:
638
            log.error('Error loading image ' + str(ex))
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
            pixbuf = None
            alt = attrs.get('alt', 'Broken image')
            try:
                loader.close()
            except Exception:
                pass
        return pixbuf

    def _begin_span(self, style, tag=None, id_=None):
        if style is None:
            self.styles.append(tag)
            return None
        if tag is None:
            if id_:
                tag = self.textbuf.create_tag(id_)
            else:
                tag = self.textbuf.create_tag() # we create anonymous tag
        for attr, val in style_iter(style):
            attr = attr.lower()
            val = val
            try:
                method = self.__style_methods[attr]
            except KeyError:
662
                log.warning('Style attribute "%s" requested '
663
                    'but not yet implemented' % attr)
664 665 666 667 668 669 670 671 672 673 674
            else:
                method(self, tag, val)
        self.styles.append(tag)

    def _end_span(self):
        self.styles.pop()

    def _jump_line(self):
        self.textbuf.insert_with_tags_by_name(self.iter, '\n', 'eol')
        self.starting = True

675 676 677
    def _insert_text(self, text, working_iter=None):
        if working_iter == None:
            working_iter = self.iter
678 679 680 681
        if self.starting and text != '\n':
            self.starting = (text[-1] == '\n')
        tags = self._get_style_tags()
        if tags:
682
            self.textbuf.insert_with_tags(working_iter, text, *tags)
683
        else:
684
            self.textbuf.insert(working_iter, text)
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704

    def _starts_line(self):
        return self.starting or self.iter.starts_line()

    def _flush_text(self):
        if not self.text: return
        text, self.text = self.text, ''
        if not self.preserve:
            text = text.replace('\n', ' ')
            self.handle_specials(whitespace_rx.sub(' ', text))
        else:
            self._insert_text(text.strip('\n'))

    def _anchor_event(self, tag, textview, event, iter_, href, type_):
        if event.type == gtk.gdk.BUTTON_PRESS:
            self.textview.emit('url-clicked', href, type_)
            return True
        return False

    def handle_specials(self, text):
705 706
        self.iter = self.conv_textview.detect_and_print_special_text(text,
            self._get_style_tags())
707 708 709 710 711 712 713 714 715 716 717 718 719

    def characters(self, content):
        if self.preserve:
            self.text += content
            return
        if allwhitespace_rx.match(content) is not None and self._starts_line():
            return
        self.text += content
        self.starting = False


    def startElement(self, name, attrs):
        self._flush_text()
720
        klass = [i for i in attrs.get('class', ' ').split(' ') if i]
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
        style = ''
        #Add styles defined for classes
        for k in klass:
            if k  in classes:
                style += classes[k]

        tag = None
        #FIXME: if we want to use id, it needs to be unique across
        # the whole textview, so we need to add something like the
        # message-id to it.
        #id_ = attrs.get('id',None)
        id_ = None
        if name == 'a':
            #TODO: accesskey, charset, hreflang, rel, rev, tabindex, type
            href = attrs.get('href', None)
            if not href:
                href = attrs.get('HREF', None)
            # Gaim sends HREF instead of href
739
            title = attrs.get('title', attrs.get('rel', href))
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
            type_ = attrs.get('type', None)
            tag = self._create_url(href, title, type_, id_)
        elif name == 'blockquote':
            cite = attrs.get('cite', None)
            if cite:
                tag = self.textbuf.create_tag(id_)
                tag.title = title
                tag.is_anchor = True
        elif name in LIST_ELEMS:
            style += ';margin-left: 2em'
        elif name == 'img':
            tag = self._process_img(attrs)
        if name in element_styles:
            style += element_styles[name]
        # so that explicit styles override implicit ones,
        # we add the attribute last
756
        style += ";"+attrs.get('style', '')
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
        if style == '':
            style = None
        self._begin_span(style, tag, id_)

        if name == 'br':
            pass # handled in endElement
        elif name == 'hr':
            pass # handled in endElement
        elif name in BLOCK:
            if not self._starts_line():
                self._jump_line()
            if name == 'pre':
                self.preserve = True
        elif name == 'span':
            pass
        elif name in ('dl', 'ul'):
            if not self._starts_line():
                self._jump_line()
            self.list_counters.append(None)
        elif name == 'ol':
            if not self._starts_line():
                self._jump_line()
            self.list_counters.append(0)
        elif name == 'li':
            if self.list_counters[-1] is None:
                li_head = unichr(0x2022)
            else:
                self.list_counters[-1] += 1
                li_head = '%i.' % self.list_counters[-1]
            self.text = ' '*len(self.list_counters)*4 + li_head + ' '
            self._flush_text()
            self.starting = True
        elif name == 'dd':
            self._jump_line()
        elif name == 'dt':
            if not self.starting:
                self._jump_line()
        elif name in ('a', 'img', 'body', 'html'):
            pass
        elif name in INLINE:
797
            self._jump_line()
798
        else:
799
            log.warning('Unhandled element "%s"' % name)
800 801 802 803 804 805 806 807 808 809

    def endElement(self, name):
        endPreserving = False
        newLine = False
        if name == 'br':
            newLine = True
        elif name == 'hr':
            #FIXME: plenty of unused attributes (width, height,...) :)
            self._jump_line()
            try:
810 811
                self.textbuf.insert_pixbuf(self.iter,
                    self.textview.focus_out_line_pixbuf)
812 813 814
                #self._insert_text(u'\u2550'*40)
                self._jump_line()
            except Exception, e:
815
                log.debug(str('Error in hr'+e))
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
        elif name in LIST_ELEMS:
            self.list_counters.pop()
        elif name == 'li':
            newLine = True
        elif name == 'img':
            pass
        elif name == 'body' or name == 'html':
            pass
        elif name == 'a':
            pass
        elif name in INLINE:
            pass
        elif name in ('dd', 'dt', ):
            pass
        elif name in BLOCK:
            if name == 'pre':
                endPreserving = True
        else:
834
            log.warning("Unhandled element '%s'" % name)
835 836 837 838 839 840 841 842
        self._flush_text()
        if endPreserving:
            self.preserve = False
        if newLine:
            self._jump_line()
        self._end_span()
        #if not self._starts_line():
        #    self.text = ' '
843 844

class HtmlTextView(gtk.TextView):
845

846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
    def __init__(self):
        gobject.GObject.__init__(self)
        self.set_wrap_mode(gtk.WRAP_CHAR)
        self.set_editable(False)
        self._changed_cursor = False
        self.connect('destroy', self.__destroy_event)
        self.connect('motion-notify-event', self.__motion_notify_event)
        self.connect('leave-notify-event', self.__leave_event)
        self.connect('enter-notify-event', self.__motion_notify_event)
        self.connect('realize', self.on_html_text_view_realized)
        self.connect('unrealize', self.on_html_text_view_unrealized)
        self.connect('copy-clipboard', self.on_html_text_view_copy_clipboard)
        self.get_buffer().connect_after('mark-set', self.on_text_buffer_mark_set)
        self.get_buffer().create_tag('eol', scale = pango.SCALE_XX_SMALL)
        self.tooltip = tooltips.BaseTooltip()
        self.config = gajim.config
        self.interface = gajim.interface
        # end big hack

    def __destroy_event(self, widget):
        if self.tooltip.timeout != 0:
            self.tooltip.hide_tooltip()

    def __leave_event(self, widget, event):
        if self._changed_cursor:
            window = widget.get_window(gtk.TEXT_WINDOW_TEXT)
            window.set_cursor(gtk.gdk.Cursor(gtk.gdk.XTERM))
            self._changed_cursor = False

    def show_tooltip(self, tag):
        if not self.tooltip.win:
            # check if the current pointer is still over the line
            x, y, _ = self.window.get_pointer()
            x, y = self.window_to_buffer_coords(gtk.TEXT_WINDOW_TEXT, x, y)
            tags = self.get_iter_at_location(x, y).get_tags()
            is_over_anchor = False
            for tag_ in tags:
                if getattr(tag_, 'is_anchor', False):
                    is_over_anchor = True
                    break
            if not is_over_anchor:
                return
            text = getattr(tag, 'title', False)
            if text:
                pointer = self.get_pointer()
                position = self.window.get_origin()
                self.tooltip.show_tooltip(text, 8, position[1] + pointer[1])

    def __motion_notify_event(self, widget, event):
        x, y, _ = widget.window.get_pointer()
        x, y = widget.window_to_buffer_coords(gtk.TEXT_WINDOW_TEXT, x, y)
        tags = widget.get_iter_at_location(x, y).get_tags()
        anchor_tags = [tag for tag in tags if getattr(tag, 'is_anchor', False)]
        if self.tooltip.timeout != 0:
            # Check if we should hide the line tooltip
            if not anchor_tags:
                self.tooltip.hide_tooltip()
        if not self._changed_cursor and anchor_tags:
            window = widget.get_window(gtk.TEXT_WINDOW_TEXT)
            window.set_cursor(gtk.gdk.Cursor(gtk.gdk.HAND2))
            self._changed_cursor = True
907 908
            self.tooltip.timeout = gobject.timeout_add(500, self.show_tooltip,
                anchor_tags[0])
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
        elif self._changed_cursor and not anchor_tags:
            window = widget.get_window(gtk.TEXT_WINDOW_TEXT)
            window.set_cursor(gtk.gdk.Cursor(gtk.gdk.XTERM))
            self._changed_cursor = False
        return False

    def display_html(self, html, conv_textview):
        buffer_ = self.get_buffer()
        eob = buffer_.get_end_iter()
        ## this works too if libxml2 is not available
        # parser = xml.sax.make_parser(['drv_libxml2'])
        # parser.setFeature(xml.sax.handler.feature_validation, True)
        parser = xml.sax.make_parser()
        parser.setContentHandler(HtmlHandler(conv_textview, eob))
        parser.parse(StringIO(html))

        # too much space after :)
        #if not eob.starts_line():
        #    buffer_.insert(eob, '\n')

    def on_html_text_view_copy_clipboard(self, unused_data):
        clipboard = self.get_clipboard(gtk.gdk.SELECTION_CLIPBOARD)
        clipboard.set_text(self.get_selected_text())
        self.emit_stop_by_name('copy-clipboard')

    def on_html_text_view_realized(self, unused_data):
935 936
        self.get_buffer().remove_selection_clipboard(self.get_clipboard(
            gtk.gdk.SELECTION_PRIMARY))
937 938

    def on_html_text_view_unrealized(self, unused_data):
939 940
        self.get_buffer().add_selection_clipboard(self.get_clipboard(
            gtk.gdk.SELECTION_PRIMARY))
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969

    def on_text_buffer_mark_set(self, location, mark, unused_data):
        bounds = self.get_buffer().get_selection_bounds()
        if bounds:
            # textview can be hidden while we add a new line in it.
            if self.has_screen():
                clipboard = self.get_clipboard(gtk.gdk.SELECTION_PRIMARY)
                clipboard.set_text(self.get_selected_text())

    def get_selected_text(self):
        bounds = self.get_buffer().get_selection_bounds()
        selection = ''
        if bounds:
            (search_iter, end) = bounds

            while (search_iter.compare(end)):
                character = search_iter.get_char()
                if character == u'\ufffc':
                    anchor = search_iter.get_child_anchor()
                    if anchor:
                        text = anchor.get_data('plaintext')
                        if text:
                            selection+=text
                    else:
                        selection+=character
                else:
                    selection+=character
                search_iter.forward_char()
        return selection
970

971 972 973
change_cursor = None

if __name__ == '__main__':
974 975 976 977 978
    import os

    from conversation_textview import ConversationTextview
    import gajim as gaj

979
    log = logging.getLogger()
980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
    gaj.Interface()

    htmlview = ConversationTextview(None)

    path = gtkgui_helpers.get_icon_path('gajim-muc_separator')
    # use this for hr
    htmlview.tv.focus_out_line_pixbuf =  gtk.gdk.pixbuf_new_from_file(path)

    tooltip = tooltips.BaseTooltip()

    def on_textview_motion_notify_event(widget, event):
        """
        Change the cursor to a hand when we are over a mail or an url
        """
        global change_cursor
        pointer_x, pointer_y = htmlview.tv.window.get_pointer()[0:2]
996 997
        x, y = htmlview.tv.window_to_buffer_coords(gtk.TEXT_WINDOW_TEXT,
            pointer_x, pointer_y)
998 999 1000
        tags = htmlview.tv.get_iter_at_location(x, y).get_tags()
        if change_cursor:
            htmlview.tv.get_window(gtk.TEXT_WINDOW_TEXT).set_cursor(
1001
                gtk.gdk.Cursor(gtk.gdk.XTERM))
1002 1003 1004 1005 1006 1007
            change_cursor = None
        tag_table = htmlview.tv.get_buffer().get_tag_table()
        for tag in tags:
            try:
                if tag.is_anchor:
                    htmlview.tv.get_window(gtk.TEXT_WINDOW_TEXT).set_cursor(
1008
                        gtk.gdk.Cursor(gtk.gdk.HAND2))
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
                    change_cursor = tag
                elif tag == tag_table.lookup('focus-out-line'):
                    over_line = True
            except Exception:
                pass

        #if line_tooltip.timeout != 0:
            # Check if we should hide the line tooltip
        #       if not over_line:
        #               line_tooltip.hide_tooltip()
        #if over_line and not line_tooltip.win:
        #       line_tooltip.timeout = gobject.timeout_add(500,
        #               show_line_tooltip)
        #       htmlview.tv.get_window(gtk.TEXT_WINDOW_TEXT).set_cursor(
        #               gtk.gdk.Cursor(gtk.gdk.LEFT_PTR))
        #       change_cursor = tag

    htmlview.tv.connect('motion_notify_event', on_textview_motion_notify_event)

1028
    def handler(texttag, widget, event, iter_, kind):
1029
        if event.type == gtk.gdk.BUTTON_PRESS:
1030
            pass
1031

1032
    htmlview.tv.hyperlink_handler = htmlview.hyperlink_handler
1033

1034 1035 1036 1037 1038
    htmlview.print_real_text(None, xhtml='<div>'
    '<span style="color: red; text-decoration:underline">Hello</span><br/>\n'
      '  <img src="http://images.slashdot.org/topics/topicsoftware.gif"/><br/>\n'
    '<span style="font-size: 500%; font-family: serif">World</span>\n'
      '</div>\n')
1039
    htmlview.print_real_text(None, xhtml='<hr />')
1040 1041 1042 1043 1044 1045 1046
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
     <p xmlns='http://www.w3.org/1999/xhtml'>a:b
       <a href='http://google.com/' xmlns='http://www.w3.org/1999/xhtml'>Google
       </a>
     </p><br/>
    </body>''')
1047 1048 1049
    htmlview.print_real_text(None, xhtml='''
     <body xmlns='http://www.w3.org/1999/xhtml'>
      <p style='font-size:large'>
1050 1051
            <span style='font-style: italic'>O
            <span style='font-size:larger'>M</span>G</span>,
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
            I&apos;m <span style='color:green'>green</span>
            with <span style='font-weight: bold'>envy</span>!
      </p>
     </body>
            ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
            http://test.com/  testing links autolinkifying
    </body>
            ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
1066 1067
      <p>As Emerson said in his essay <span style='
        font-style: italic; background-color:cyan'>Self-Reliance</span>:</p>
1068 1069 1070 1071 1072 1073 1074 1075
      <p style='margin-left: 5px; margin-right: 2%'>
            &quot;A foolish consistency is the hobgoblin of little minds.&quot;
      </p>
    </body>
            ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
1076 1077 1078 1079 1080 1081 1082
      <p style='text-align:center'>
        Hey, are you licensed to <a href='http://www.jabber.org/'>Jabber</a>?
      </p>
      <p style='text-align:right'>
        <img src='http://www.xmpp.org/images/psa-license.jpg'
        alt='A License to Jabber' width='50%' height='50%'/>
      </p>
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
    </body>
            ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
      <ul style='background-color:rgb(120,140,100)'>
       <li> One </li>
       <li> Two </li>
       <li> Three </li>
      </ul><hr /><pre style="background-color:rgb(120,120,120)">def fac(n):
def faciter(n,acc):
    if n==0: return acc
    return faciter(n-1, acc*n)
if n&lt;0: raise ValueError('Must be non-negative')
return faciter(n,1)</pre>
    </body>
            ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
     <ol style='background-color:rgb(120,140,100)'>
       <li> One </li>
       <li> Two is nested: <ul style='background-color:rgb(200,200,100)'>
                     <li> One </li>
                     <li style='font-size:50%'> Two </li>
                     <li style='font-size:200%'> Three </li>
                     <li style='font-size:9999pt'> Four </li>
                    </ul></li>
       <li> Three </li></ol>
    </body>
            ''')
1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
    <p>
      <strong>
        <a href='xmpp:example@example.org'>xmpp link</a>
      </strong>: </p>
    <div xmlns='http://www.w3.org/1999/xhtml'>
      <cite style='margin: 7px;' title='xmpp:examples@example.org'>
        <p>
          <strong>examples@example.org wrote:</strong>
        </p>
        <p>this cite - bla bla bla, smile- :-)  ...</p>
      </cite>
      <div>
        <p>some text</p>
      </div>
    </div>
    <p/>
    <p>#232/1</p>
1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
    </body>
    ''')
    htmlview.print_real_text(None, xhtml='<hr />')
    htmlview.print_real_text(None, xhtml='''
    <body xmlns='http://www.w3.org/1999/xhtml'>
    <br/>
<img src='\
AAAC8IyPqcvt3wCcDkiLc7C0qwyGHhSWpjQu5yqmCYsapyuvUUlvONmOZtfzgFz\
ByTB10QgxOR0TqBQejhRNzOfkVJ+5YiUqrXF5Y5lKh/DeuNcP5yLWGsEbtLiOSp\
a/TPg7JpJHxyendzWTBfX0cxOnKPjgBzi4diinWGdkF8kjdfnycQZXZeYGejmJl\
ZeGl9i2icVqaNVailT6F5iJ90m6mvuTS4OK05M0vDk0Q4XUtwvKOzrcd3iq9uis\
F81M1OIcR7lEewwcLp7tuNNkM3uNna3F2JQFo97Vriy/Xl4/f1cf5VWzXyym7PH\
hhx4dbgYKAAA7' alt='Larry'/>
1147 1148
    </body>
    ''')
1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
    htmlview.tv.show()
    sw = gtk.ScrolledWindow()
    sw.set_property('hscrollbar-policy', gtk.POLICY_AUTOMATIC)
    sw.set_property('vscrollbar-policy', gtk.POLICY_AUTOMATIC)
    sw.set_property('border-width', 0)
    sw.add(htmlview.tv)
    sw.show()
    frame = gtk.Frame()
    frame.set_shadow_type(gtk.SHADOW_IN)
    frame.show()
    frame.add(sw)
    w = gtk.Window()
    w.add(frame)
    w.set_default_size(400, 300)
    w.show_all()
    w.connect('destroy', lambda w: gtk.main_quit())
    gtk.main()