Spaces:
Running
Running
# Copyright 2016-present MongoDB, Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# Some changes copyright 2021-present Matthias Valvekens, | |
# licensed under the license of the pyHanko project (see LICENSE file). | |
"""An implementation of RFC4013 SASLprep.""" | |
__all__ = ["saslprep"] | |
import stringprep | |
import unicodedata | |
from typing import Callable, Tuple | |
from pdf2zh.pdfexceptions import PDFValueError | |
# RFC4013 section 2.3 prohibited output. | |
_PROHIBITED: Tuple[Callable[[str], bool], ...] = ( | |
# A strict reading of RFC 4013 requires table c12 here, but | |
# characters from it are mapped to SPACE in the Map step. Can | |
# normalization reintroduce them somehow? | |
stringprep.in_table_c12, | |
stringprep.in_table_c21_c22, | |
stringprep.in_table_c3, | |
stringprep.in_table_c4, | |
stringprep.in_table_c5, | |
stringprep.in_table_c6, | |
stringprep.in_table_c7, | |
stringprep.in_table_c8, | |
stringprep.in_table_c9, | |
) | |
def saslprep(data: str, prohibit_unassigned_code_points: bool = True) -> str: | |
"""An implementation of RFC4013 SASLprep. | |
:param data: | |
The string to SASLprep. | |
:param prohibit_unassigned_code_points: | |
RFC 3454 and RFCs for various SASL mechanisms distinguish between | |
`queries` (unassigned code points allowed) and | |
`stored strings` (unassigned code points prohibited). Defaults | |
to ``True`` (unassigned code points are prohibited). | |
:return: The SASLprep'ed version of `data`. | |
""" | |
if prohibit_unassigned_code_points: | |
prohibited = _PROHIBITED + (stringprep.in_table_a1,) | |
else: | |
prohibited = _PROHIBITED | |
# RFC3454 section 2, step 1 - Map | |
# RFC4013 section 2.1 mappings | |
# Map Non-ASCII space characters to SPACE (U+0020). Map | |
# commonly mapped to nothing characters to, well, nothing. | |
in_table_c12 = stringprep.in_table_c12 | |
in_table_b1 = stringprep.in_table_b1 | |
data = "".join( | |
[ | |
"\u0020" if in_table_c12(elt) else elt | |
for elt in data | |
if not in_table_b1(elt) | |
], | |
) | |
# RFC3454 section 2, step 2 - Normalize | |
# RFC4013 section 2.2 normalization | |
data = unicodedata.ucd_3_2_0.normalize("NFKC", data) | |
in_table_d1 = stringprep.in_table_d1 | |
if in_table_d1(data[0]): | |
if not in_table_d1(data[-1]): | |
# RFC3454, Section 6, #3. If a string contains any | |
# RandALCat character, the first and last characters | |
# MUST be RandALCat characters. | |
raise PDFValueError("SASLprep: failed bidirectional check") | |
# RFC3454, Section 6, #2. If a string contains any RandALCat | |
# character, it MUST NOT contain any LCat character. | |
prohibited = prohibited + (stringprep.in_table_d2,) | |
else: | |
# RFC3454, Section 6, #3. Following the logic of #3, if | |
# the first character is not a RandALCat, no other character | |
# can be either. | |
prohibited = prohibited + (in_table_d1,) | |
# RFC3454 section 2, step 3 and 4 - Prohibit and check bidi | |
for char in data: | |
if any(in_table(char) for in_table in prohibited): | |
raise PDFValueError("SASLprep: failed prohibited character check") | |
return data | |