Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
pg mr
gajim
Commits
1ac4fd7a
Verified
Commit
1ac4fd7a
authored
Feb 13, 2021
by
André
Browse files
Move regular expressions to separate file
parent
c46ee9a2
Changes
5
Hide whitespace changes
Inline
Side-by-side
gajim/common/helpers.py
View file @
1ac4fd7a
...
...
@@ -80,6 +80,8 @@
from
gajim.common.const
import
URIAction
from
gajim.common.const
import
GIO_TLS_ERRORS
from
gajim.common.const
import
SHOW_LIST
from
gajim.common.regex
import
INVALID_XML_CHARS_REGEX
from
gajim.common.regex
import
STH_AT_STH_DOT_STH_REGEX
from
gajim.common.structs
import
URI
...
...
@@ -632,7 +634,7 @@ def get_auth_sha(sid, initiator, target):
def
remove_invalid_xml_chars
(
string_
):
if
string_
:
string_
=
re
.
sub
(
app
.
interface
.
invalid_XML_chars_re
,
''
,
string_
)
string_
=
re
.
sub
(
INVALID_XML_CHARS_REGEX
,
''
,
string_
)
return
string_
def
get_random_string
(
count
=
16
):
...
...
@@ -1067,7 +1069,7 @@ def parse_uri(uri):
uri
=
uri
[
4
:]
return
URI
(
type
=
URIType
.
TEL
,
data
=
uri
)
if
app
.
interface
.
sth_at_sth_dot_sth_re
.
match
(
uri
):
if
STH_AT_STH_DOT_STH_REGEX
.
match
(
uri
):
return
URI
(
type
=
URIType
.
AT
,
data
=
uri
)
if
uri
.
startswith
(
'geo:'
):
...
...
gajim/common/regex.py
0 → 100644
View file @
1ac4fd7a
import
re
def
_get_link_pattern
():
# regexp meta characters are: . ^ $ * + ? { } [ ] \ | ( )
# one escapes the metachars with \
# \S matches anything but ' ' '\t' '\n' '\r' '\f' and '\v'
# \s matches any whitespace character
# \w any alphanumeric character
# \W any non-alphanumeric character
# \b means word boundary. This is a zero-width assertion that
# matches only at the beginning or end of a word.
# ^ matches at the beginning of lines
#
# * means 0 or more times
# + means 1 or more times
# ? means 0 or 1 time
# | means or
# [^*] anything but '*' (inside [] you don't have to escape metachars)
# [^\s*] anything but whitespaces and '*'
# (?<!\S) is a one char lookbehind assertion and asks for any leading
# whitespace
# and matches beginning of lines so we have correct formatting detection
# even if the text is just '*foo*'
# (?!\S) is the same thing but it's a lookahead assertion
# \S*[^\s\W] --> in the matching string don't match ? or ) etc.. if at
# the end
# so http://be) will match http://be and http://be)be) will match
# http://be)be
legacy_prefixes
=
r
"((?<=\()(www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$"
\
r
"&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+(?=\)))"
\
r
"|((www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]"
\
r
"|%[A-Fa-f0-9]{2})+"
\
r
"\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+)"
# NOTE: it's ok to catch www.gr such stuff exist!
# FIXME: recognize xmpp: and treat it specially
links
=
r
"((?<=\()[A-Za-z][A-Za-z0-9\+\.\-]*:"
\
r
"([\w\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+"
\
r
"(?=\)))|(\w[\w\+\.\-]*:([^<>\s]|%[A-Fa-f0-9]{2})+)"
# 2nd one: at_least_one_char@at_least_one_char.at_least_one_char
mail
=
r
'\bmailto:\S*[^\s\W]|'
r
'\b\S+@\S+\.\S*[^\s\W]'
link_pattern
=
links
+
'|'
+
mail
+
'|'
+
legacy_prefixes
return
link_pattern
def
_get_basic_pattern
():
basic_pattern
=
_get_link_pattern
()
# detects eg. *b* *bold* *bold bold* test *bold* *bold*! (*bold*)
# doesn't detect (it's a feature :P) * bold* *bold * * bold * test*bold*
formatting
=
r
'|(?<!\w)'
r
'\*[^\s*]'
r
'([^*]*[^\s*])?'
r
'\*(?!\w)|'
\
r
'(?<!\S)'
r
'~[^\s~]'
r
'([^~]*[^\s~])?'
r
'~(?!\S)|'
\
r
'(?<!\w)'
r
'_[^\s_]'
r
'([^_]*[^\s_])?'
r
'_(?!\w)'
return
basic_pattern
+
formatting
def
_get_emot_and_basic_pattern
(
use_ascii_formatting
=
True
):
from
gajim.gui.emoji_data
import
emoji_data
# because emoticons match later (in the string) they need to be after
# basic matches that may occur earlier
emoticons
=
emoji_data
.
get_regex
()
if
use_ascii_formatting
:
pattern
=
_get_basic_pattern
()
else
:
pattern
=
_get_link_pattern
()
return
'%s|%s'
%
(
pattern
,
emoticons
)
LINK_REGEX
=
re
.
compile
(
_get_link_pattern
(),
re
.
I
|
re
.
U
)
# link pattern + ASCII formatting
BASIC_REGEX
=
re
.
compile
(
_get_basic_pattern
(),
re
.
IGNORECASE
)
# emoticons + link pattern
EMOT_AND_LINK_REGEX
=
re
.
compile
(
_get_emot_and_basic_pattern
(
False
),
re
.
IGNORECASE
)
# emoticons + link pattern + ASCII formatting
EMOT_AND_BASIC_REGEX
=
re
.
compile
(
_get_emot_and_basic_pattern
(
True
),
re
.
IGNORECASE
)
INVALID_XML_CHARS_REGEX
=
re
.
compile
(
'[
\x00
-
\x08
]|[
\x0b
-
\x0c
]|[
\x0e
-
\x1f
]|[
\ud800
-
\udfff
]|[
\ufffe
-
\uffff
]'
)
# at least one character in 3 parts (before @, after @, after .)
STH_AT_STH_DOT_STH_REGEX
=
re
.
compile
(
r
'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
)
gajim/conversation_textview.py
View file @
1ac4fd7a
...
...
@@ -44,6 +44,11 @@
from
gajim.common.const
import
Trust
from
gajim.common.const
import
URI_SCHEMES
from
gajim.common.helpers
import
to_user_string
from
gajim.common.regex
import
STH_AT_STH_DOT_STH_REGEX
from
gajim.common.regex
import
BASIC_REGEX
from
gajim.common.regex
import
LINK_REGEX
from
gajim.common.regex
import
EMOT_AND_BASIC_REGEX
from
gajim.common.regex
import
EMOT_AND_LINK_REGEX
from
gajim.gui
import
util
from
gajim.gui.util
import
get_cursor
...
...
@@ -584,9 +589,17 @@ def detect_and_print_special_text(self, otext, other_tags, graphics=True,
# basic: links + mail + formatting is always checked (we like that)
if
app
.
settings
.
get
(
'emoticons_theme'
)
and
graphics
:
# search for emoticons & urls
iterator
=
app
.
interface
.
emot_and_basic_re
.
finditer
(
otext
)
else
:
# search for just urls + mail + formatting
iterator
=
app
.
interface
.
basic_pattern_re
.
finditer
(
otext
)
if
app
.
settings
.
get
(
'ascii_formatting'
):
regex
=
EMOT_AND_BASIC_REGEX
else
:
regex
=
EMOT_AND_LINK_REGEX
else
:
if
app
.
settings
.
get
(
'ascii_formatting'
):
# search for just urls + mail + formatting
regex
=
BASIC_REGEX
else
:
# search for just urls + mail
regex
=
LINK_REGEX
iterator
=
regex
.
finditer
(
otext
)
if
iter_
:
end_iter
=
iter_
else
:
...
...
@@ -693,7 +706,7 @@ def print_special_text(self, special_text, other_tags, graphics=True,
tags
.
append
(
'mail'
)
elif
special_text
.
startswith
(
'xmpp:'
)
and
not
is_xhtml_link
:
tags
.
append
(
'xmpp'
)
elif
app
.
interface
.
sth_at_sth_dot_sth_re
.
match
(
special_text
)
and
\
elif
STH_AT_STH_DOT_STH_REGEX
.
match
(
special_text
)
and
\
not
is_xhtml_link
:
# it's a JID or mail
tags
.
append
(
'sth_at_sth'
)
...
...
gajim/gtk/message_input.py
View file @
1ac4fd7a
...
...
@@ -28,6 +28,7 @@
from
gajim.common
import
app
from
gajim.common.i18n
import
_
from
gajim.common.const
import
StyleAttr
from
gajim.common.regex
import
LINK_REGEX
from
.util
import
scroll_to_end
...
...
@@ -207,7 +208,7 @@ def make_clickable_urls(self, text):
index
=
0
new_text
=
''
iterator
=
app
.
interface
.
link_pattern_re
.
finditer
(
text
)
iterator
=
LINK_REGEX
.
finditer
(
text
)
for
match
in
iterator
:
start
,
end
=
match
.
span
()
url
=
text
[
start
:
end
]
...
...
gajim/gui_interface.py
View file @
1ac4fd7a
...
...
@@ -33,7 +33,6 @@
import
os
import
sys
import
re
import
time
import
json
import
logging
...
...
@@ -101,7 +100,6 @@
from
gajim.gui.dialogs
import
InputDialog
from
gajim.gui.dialogs
import
PassphraseDialog
from
gajim.gui.filechoosers
import
FileChooserDialog
from
gajim.gui.emoji_data
import
emoji_data
from
gajim.gui.filetransfer
import
FileTransfersWindow
from
gajim.gui.filetransfer_progress
import
FileTransferProgress
from
gajim.gui.roster_item_exchange
import
RosterItemExchangeWindow
...
...
@@ -1296,112 +1294,6 @@ def handle_event(self, account, fjid, type_):
if
isinstance
(
ctrl
,
ChatControlBase
):
ctrl
.
scroll_to_end
()
################################################################################
### Methods dealing with emoticons
################################################################################
@
property
def
basic_pattern_re
(
self
):
if
not
self
.
_basic_pattern_re
:
self
.
_basic_pattern_re
=
re
.
compile
(
self
.
basic_pattern
,
re
.
IGNORECASE
)
return
self
.
_basic_pattern_re
@
property
def
emot_and_basic_re
(
self
):
if
not
self
.
_emot_and_basic_re
:
self
.
_emot_and_basic_re
=
re
.
compile
(
self
.
emot_and_basic
,
re
.
IGNORECASE
)
return
self
.
_emot_and_basic_re
@
property
def
sth_at_sth_dot_sth_re
(
self
):
if
not
self
.
_sth_at_sth_dot_sth_re
:
self
.
_sth_at_sth_dot_sth_re
=
re
.
compile
(
self
.
sth_at_sth_dot_sth
)
return
self
.
_sth_at_sth_dot_sth_re
@
property
def
invalid_XML_chars_re
(
self
):
if
not
self
.
_invalid_XML_chars_re
:
self
.
_invalid_XML_chars_re
=
re
.
compile
(
self
.
invalid_XML_chars
)
return
self
.
_invalid_XML_chars_re
def
make_regexps
(
self
):
# regexp meta characters are: . ^ $ * + ? { } [ ] \ | ( )
# one escapes the metachars with \
# \S matches anything but ' ' '\t' '\n' '\r' '\f' and '\v'
# \s matches any whitespace character
# \w any alphanumeric character
# \W any non-alphanumeric character
# \b means word boundary. This is a zero-width assertion that
# matches only at the beginning or end of a word.
# ^ matches at the beginning of lines
#
# * means 0 or more times
# + means 1 or more times
# ? means 0 or 1 time
# | means or
# [^*] anything but '*' (inside [] you don't have to escape metachars)
# [^\s*] anything but whitespaces and '*'
# (?<!\S) is a one char lookbehind assertion and asks for any leading
# whitespace
# and matches beginning of lines so we have correct formatting detection
# even if the text is just '*foo*'
# (?!\S) is the same thing but it's a lookahead assertion
# \S*[^\s\W] --> in the matching string don't match ? or ) etc.. if at
# the end
# so http://be) will match http://be and http://be)be) will match
# http://be)be
self
.
_basic_pattern_re
=
None
self
.
_emot_and_basic_re
=
None
self
.
_sth_at_sth_dot_sth_re
=
None
self
.
_invalid_XML_chars_re
=
None
legacy_prefixes
=
r
"((?<=\()(www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$"
\
r
"&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+(?=\)))"
\
r
"|((www|ftp)\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]"
\
r
"|%[A-Fa-f0-9]{2})+"
\
r
"\.([A-Za-z0-9\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+)"
# NOTE: it's ok to catch www.gr such stuff exist!
# FIXME: recognize xmpp: and treat it specially
links
=
r
"((?<=\()[A-Za-z][A-Za-z0-9\+\.\-]*:"
\
r
"([\w\.\-_~:/\?#\[\]@!\$&'\(\)\*\+,;=]|%[A-Fa-f0-9]{2})+"
\
r
"(?=\)))|(\w[\w\+\.\-]*:([^<>\s]|%[A-Fa-f0-9]{2})+)"
# 2nd one: at_least_one_char@at_least_one_char.at_least_one_char
mail
=
r
'\bmailto:\S*[^\s\W]|'
r
'\b\S+@\S+\.\S*[^\s\W]'
# detects eg. *b* *bold* *bold bold* test *bold* *bold*! (*bold*)
# doesn't detect (it's a feature :P) * bold* *bold * * bold * test*bold*
formatting
=
r
'|(?<!\w)'
r
'\*[^\s*]'
r
'([^*]*[^\s*])?'
r
'\*(?!\w)|'
\
r
'(?<!\S)'
r
'~[^\s~]'
r
'([^~]*[^\s~])?'
r
'~(?!\S)|'
\
r
'(?<!\w)'
r
'_[^\s_]'
r
'([^_]*[^\s_])?'
r
'_(?!\w)'
basic_pattern
=
links
+
'|'
+
mail
+
'|'
+
legacy_prefixes
link_pattern
=
basic_pattern
self
.
link_pattern_re
=
re
.
compile
(
link_pattern
,
re
.
I
|
re
.
U
)
if
app
.
settings
.
get
(
'ascii_formatting'
):
basic_pattern
+=
formatting
self
.
basic_pattern
=
basic_pattern
# because emoticons match later (in the string) they need to be after
# basic matches that may occur earlier
emoticons
=
emoji_data
.
get_regex
()
self
.
emot_and_basic
=
'%s|%s'
%
(
basic_pattern
,
emoticons
)
# at least one character in 3 parts (before @, after @, after .)
self
.
sth_at_sth_dot_sth
=
r
'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
# Invalid XML chars
self
.
invalid_XML_chars
=
'[
\x00
-
\x08
]|[
\x0b
-
\x0c
]|[
\x0e
-
\x1f
]|'
\
'[
\ud800
-
\udfff
]|[
\ufffe
-
\uffff
]'
################################################################################
### Methods for opening new messages controls
################################################################################
...
...
@@ -2114,15 +2006,6 @@ def __init__(self):
self
.
handlers
=
{}
self
.
roster
=
None
self
.
_invalid_XML_chars_re
=
None
self
.
_basic_pattern_re
=
None
self
.
_emot_and_basic_re
=
None
self
.
_sth_at_sth_dot_sth_re
=
None
self
.
link_pattern_re
=
None
self
.
invalid_XML_chars
=
None
self
.
basic_pattern
=
None
self
.
emot_and_basic
=
None
self
.
sth_at_sth_dot_sth
=
None
self
.
avatar_storage
=
AvatarStorage
()
...
...
@@ -2199,8 +2082,6 @@ def __init__(self):
from
gajim.gui.emoji_chooser
import
emoji_chooser
emoji_chooser
.
load
()
self
.
make_regexps
()
self
.
last_ftwindow_update
=
0
self
.
_network_monitor
=
Gio
.
NetworkMonitor
.
get_default
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment