Commit 4c09587d authored by Philipp Hörist's avatar Philipp Hörist
Browse files

feat: Add stringprep implementation

parent 7730254b
# This file is part of nbxmpp.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; If not, see <http://www.gnu.org/licenses/>.
# Initial version taken from https://github.com/horazont/aioxmpp
# Modified on 30-AUG-2022
from __future__ import annotations
from typing import Optional
from typing import Callable
import stringprep
from unicodedata import ucd_3_2_0
_nodeprep_prohibited = frozenset('"&\'/:<>@')
def is_RandALCat(c: str) -> bool:
return ucd_3_2_0.bidirectional(c) in ('R', 'AL')
def is_LCat(c: str) -> bool:
return ucd_3_2_0.bidirectional(c) == 'L'
def check_nodeprep_prohibited(char: str) -> bool:
return char in _nodeprep_prohibited
def _check_against_tables(chars: list[str],
tables: tuple[Callable[[str], bool]]
) -> Optional[str]:
'''
Perform a check against the table predicates in `tables`. `tables` must be
a reusable iterable containing characteristic functions of character sets,
that is, functions which return :data:`True` if the character is in the
table.
The function returns the first character occuring in any of the tables or
:data:`None` if no character matches.
'''
for c in chars:
if any(in_table(c) for in_table in tables):
return c
return None
def do_normalization(chars: list[str]) -> None:
'''
Perform the stringprep normalization. Operates in-place on a list of
unicode characters provided in `chars`.
'''
chars[:] = list(ucd_3_2_0.normalize('NFKC', ''.join(chars)))
def check_bidi(chars: list[str]) -> None:
'''
Check proper bidirectionality as per stringprep. Operates on a list of
unicode characters provided in `chars`.
'''
# the empty string is valid, as it cannot violate the RandALCat constraints
if not chars:
return
# first_is_RorAL = ucd_3_2_0.bidirectional(chars[0]) in {"R", "AL"}
# if first_is_RorAL:
has_RandALCat = any(is_RandALCat(c) for c in chars)
if not has_RandALCat:
return
has_LCat = any(is_LCat(c) for c in chars)
if has_LCat:
raise ValueError('L and R/AL characters must not occur in the same'
' string')
if not is_RandALCat(chars[0]) or not is_RandALCat(chars[-1]):
raise ValueError('R/AL string must start and end with R/AL character.')
def check_against_tables(chars: list[str],
bad_tables: tuple[Callable[[str], bool], ...]
) -> None:
'''
Check against tables, by checking whether any of the characters
from `chars` are in any of the `bad_tables`.
Operates in-place on a list of code points from `chars`.
'''
violator = _check_against_tables(chars, bad_tables)
if violator is not None:
raise ValueError('Input contains prohibited or unassigned codepoint: '
'U+{:04x}'.format(ord(violator)))
def _nodeprep_do_mapping(chars: list[str]) -> None:
i = 0
while i < len(chars):
c = chars[i]
if stringprep.in_table_b1(c):
del chars[i]
else:
replacement = stringprep.map_table_b2(c)
if replacement != c:
chars[i:(i + 1)] = list(replacement)
i += len(replacement)
def nodeprep(string: str, allow_unassigned: bool = False) -> str:
'''
Process the given `string` using the Nodeprep (`RFC 6122`_) profile. In the
error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError` is
raised.
'''
chars = list(string)
_nodeprep_do_mapping(chars)
do_normalization(chars)
check_against_tables(
chars,
(
stringprep.in_table_c11,
stringprep.in_table_c12,
stringprep.in_table_c21,
stringprep.in_table_c22,
stringprep.in_table_c3,
stringprep.in_table_c4,
stringprep.in_table_c5,
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9,
check_nodeprep_prohibited,
))
check_bidi(chars)
if not allow_unassigned:
check_against_tables(
chars,
(
stringprep.in_table_a1,
)
)
return ''.join(chars)
def _resourceprep_do_mapping(chars: list[str]) -> None:
i = 0
while i < len(chars):
c = chars[i]
if stringprep.in_table_b1(c):
del chars[i]
continue
i += 1
def resourceprep(string: str, allow_unassigned: bool = False) -> str:
'''
Process the given `string` using the Resourceprep (`RFC 6122`_) profile. In
the error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError`
is raised.
'''
chars = list(string)
_resourceprep_do_mapping(chars)
do_normalization(chars)
check_against_tables(
chars,
(
stringprep.in_table_c12,
stringprep.in_table_c21,
stringprep.in_table_c22,
stringprep.in_table_c3,
stringprep.in_table_c4,
stringprep.in_table_c5,
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9,
))
check_bidi(chars)
if not allow_unassigned:
check_against_tables(
chars,
(
stringprep.in_table_a1,
)
)
return ''.join(chars)
def nameprep(string: str, allow_unassigned: bool = False) -> str:
'''
Process the given `string` using the Nameprep (`RFC 3491`_) profile. In the
error cases defined in `RFC 3454`_ (stringprep), a :class:`ValueError` is
raised.
'''
chars = list(string)
_nodeprep_do_mapping(chars)
do_normalization(chars)
check_against_tables(
chars,
(
stringprep.in_table_c12,
stringprep.in_table_c22,
stringprep.in_table_c3,
stringprep.in_table_c4,
stringprep.in_table_c5,
stringprep.in_table_c6,
stringprep.in_table_c7,
stringprep.in_table_c8,
stringprep.in_table_c9,
))
check_bidi(chars)
if not allow_unassigned:
check_against_tables(
chars,
(
stringprep.in_table_a1,
)
)
return ''.join(chars)
import os
import unittest
from nbxmpp.protocol import LocalpartByteLimit
......@@ -8,6 +9,7 @@ from nbxmpp.protocol import DomainpartByteLimit
from nbxmpp.protocol import DomainpartNotAllowedChar
from nbxmpp.protocol import JID
class JIDParsing(unittest.TestCase):
def test_valid_jids(self):
......@@ -36,11 +38,9 @@ class JIDParsing(unittest.TestCase):
tests = [
('"juliet"@example.com', LocalpartNotAllowedChar),
('foo bar@example.com', LocalpartNotAllowedChar),
('henry\U00002163@example.com', LocalpartNotAllowedChar),
('@example.com', LocalpartByteLimit),
('user@example.com/', ResourcepartByteLimit),
('user@example.com/\U00000001', ResourcepartNotAllowedChar),
('\U0000265A@example.com', LocalpartNotAllowedChar),
('user@host@example.com', DomainpartNotAllowedChar),
('juliet@', DomainpartByteLimit),
('/foobar', DomainpartByteLimit),
......@@ -50,6 +50,19 @@ class JIDParsing(unittest.TestCase):
with self.assertRaises(exception):
JID.from_string(jid)
def test_invalid_precis_jids(self):
os.environ['NBXMPP_USE_PRECIS'] = 'true'
tests = [
('henry\U00002163@example.com', LocalpartNotAllowedChar),
('\U0000265A@example.com', LocalpartNotAllowedChar),
]
for jid, exception in tests:
with self.assertRaises(exception):
JID.from_string(jid)
del os.environ['NBXMPP_USE_PRECIS']
def test_ip_literals(self):
tests = [
('juliet@[2002:4559:1FE2::4559:1FE2]/res'),
......
import unittest
from nbxmpp.stringprep import nodeprep
from nbxmpp.stringprep import resourceprep
from nbxmpp.stringprep import nameprep
from nbxmpp.stringprep import check_bidi
class TestBidi(unittest.TestCase):
def test_empty_string(self):
check_bidi('')
def test_L_RAL_violation(self):
with self.assertRaises(ValueError):
check_bidi('\u05be\u0041')
class TestNodeprep(unittest.TestCase):
def test_map_to_nothing(self):
self.assertEqual(
'ix',
nodeprep('I\u00ADX'),
'Nodeprep requirement: map SOFT HYPHEN to nothing')
def test_case_fold(self):
self.assertEqual(
'ssa',
nodeprep('ßA'),
'Nodeprep requirement: map ß to ss, A to a')
def test_nfkc(self):
self.assertEqual(
'a',
nodeprep('\u00AA'),
'Nodeprep requirement: NFKC')
self.assertEqual(
'ix',
nodeprep('\u2168'),
'Nodeprep requirement: NFKC')
def test_prohibited_character(self):
with self.assertRaisesRegex(
ValueError,
r'U\+0007',
msg='Nodeprep requirement: prohibited character (C.2.1)'):
nodeprep('\u0007')
with self.assertRaisesRegex(
ValueError,
r'U\+200e',
msg='Nodeprep requirement: prohibited character (C.8)'):
nodeprep('\u200E')
with self.assertRaisesRegex(
ValueError,
r'U\+003e',
msg='Nodeprep requirement: prohibited character (custom)'):
nodeprep('>')
def test_unassigned(self):
with self.assertRaises(
ValueError,
msg='Nodeprep requirement: unassigned'):
nodeprep('\u0221', allow_unassigned=False)
with self.assertRaises(
ValueError,
msg='enforce no unassigned by default'):
nodeprep('\u0221')
self.assertEqual(
'\u0221',
nodeprep('\u0221', allow_unassigned=True))
class TestNameprep(unittest.TestCase):
def test_map_to_nothing(self):
self.assertEqual(
'ix',
nameprep('I\u00ADX'),
'Nameprep requirement: map SOFT HYPHEN to nothing')
def test_case_fold(self):
self.assertEqual(
'ssa',
nameprep('ßA'),
'Nameprep requirement: map ß to ss, A to a')
def test_nfkc(self):
self.assertEqual(
'a',
nodeprep('\u00AA'),
'Nameprep requirement: NFKC')
self.assertEqual(
'ix',
nodeprep('\u2168'),
'Nameprep requirement: NFKC')
def test_prohibited_character(self):
with self.assertRaisesRegex(
ValueError,
r'U\+06dd',
msg='Nameprep requirement: prohibited character (C.2.2)'):
nameprep('\u06DD')
with self.assertRaisesRegex(
ValueError,
r'U\+e000',
msg='Nameprep requirement: prohibited character (C.3)'):
nameprep('\uE000')
with self.assertRaisesRegex(
ValueError,
r'U\+1fffe',
msg='Nameprep requirement: prohibited character (C.4)'):
nameprep('\U0001FFFE')
with self.assertRaisesRegex(
ValueError,
r'U\+d800',
msg='Nameprep requirement: prohibited character (C.5)'):
nameprep('\uD800')
with self.assertRaisesRegex(
ValueError,
r'U\+fff9',
msg='Nameprep requirement: prohibited character (C.6)'):
nameprep('\uFFF9')
with self.assertRaisesRegex(
ValueError,
r'U\+2ff0',
msg='Nameprep requirement: prohibited character (C.7)'):
nameprep('\u2FF0')
with self.assertRaisesRegex(
ValueError,
r'U\+e0001',
msg='Nameprep requirement: prohibited character (C.9)'):
nameprep('\U000E0001')
def test_unassigned(self):
with self.assertRaises(
ValueError,
msg='Nameprep requirement: unassigned'):
nameprep('\u0221', allow_unassigned=False)
with self.assertRaises(
ValueError,
msg='enforce no unassigned by default'):
nameprep('\u0221')
self.assertEqual(
'\u0221',
nameprep('\u0221', allow_unassigned=True))
class TestResourceprep(unittest.TestCase):
def test_map_to_nothing(self):
self.assertEqual(
'IX',
resourceprep('I\u00ADX'),
'Resourceprep requirement: map SOFT HYPHEN to nothing')
def test_nfkc(self):
self.assertEqual(
'a',
resourceprep('\u00AA'),
'Resourceprep requirement: NFKC')
self.assertEqual(
'IX',
resourceprep('\u2168'),
'Resourceprep requirement: NFKC')
def test_prohibited_character(self):
with self.assertRaisesRegex(
ValueError,
r'U\+0007',
msg='Resourceprep requirement: '
'prohibited character (C.2.1)'):
resourceprep('\u0007')
with self.assertRaisesRegex(
ValueError,
r'U\+200e',
msg='Resourceprep requirement: '
'prohibited character (C.8)'):
resourceprep('\u200E')
def test_unassigned(self):
with self.assertRaises(
ValueError,
msg='Resourceprep requirement: unassigned'):
resourceprep('\u0221', allow_unassigned=False)
with self.assertRaises(
ValueError,
msg='enforce no unassigned by default'):
resourceprep('\u0221')
self.assertEqual(
'\u0221',
resourceprep('\u0221', allow_unassigned=True))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment