Source code for pymap.parsing.modutf7

"""Implements the modified UTF-7 specification used for encoding and decoding
mailbox names in IMAP.

See Also:
    `RFC 3501 5.1.3 <https://tools.ietf.org/html/rfc3501#section-5.1.3>`_

"""

from __future__ import annotations

__all__ = ['modutf7_encode', 'modutf7_decode']


def _modified_b64encode(src: str) -> bytes:
    # Inspired by Twisted Python's implementation:
    #   https://twistedmatrix.com/trac/browser/trunk/LICENSE
    src_utf7 = src.encode('utf-7')
    return src_utf7[1:-1].replace(b'/', b',')


def _modified_b64decode(src: bytes) -> str:
    # Inspired by Twisted Python's implementation:
    #   https://twistedmatrix.com/trac/browser/trunk/LICENSE
    src_utf7 = b'+%b-' % src.replace(b',', b'/')
    return src_utf7.decode('utf-7')


[docs] def modutf7_encode(data: str) -> bytes: """Encode the string using modified UTF-7. Args: data: The input string to encode. """ ret = bytearray() is_usascii = True encode_start = None for i, symbol in enumerate(data): charpoint = ord(symbol) if is_usascii: if charpoint == 0x26: ret.extend(b'&-') elif 0x20 <= charpoint <= 0x7e: ret.append(charpoint) else: encode_start = i is_usascii = False else: if 0x20 <= charpoint <= 0x7e: to_encode = data[encode_start:i] encoded = _modified_b64encode(to_encode) ret.append(0x26) ret.extend(encoded) ret.extend((0x2d, charpoint)) is_usascii = True if not is_usascii: to_encode = data[encode_start:] encoded = _modified_b64encode(to_encode) ret.append(0x26) ret.extend(encoded) ret.append(0x2d) return bytes(ret)
[docs] def modutf7_decode(data: bytes) -> str: """Decode the bytestring using modified UTF-7. Args: data: The encoded bytestring to decode. """ parts = [] is_usascii = True buf = memoryview(data) while buf: byte = buf[0] if is_usascii: if buf[0:2] == b'&-': parts.append('&') buf = buf[2:] elif byte == 0x26: is_usascii = False buf = buf[1:] else: parts.append(chr(byte)) buf = buf[1:] else: for i, byte in enumerate(buf): if byte == 0x2d: to_decode = buf[:i].tobytes() decoded = _modified_b64decode(to_decode) parts.append(decoded) buf = buf[i + 1:] is_usascii = True break if not is_usascii: to_decode = buf.tobytes() decoded = _modified_b64decode(to_decode) parts.append(decoded) return ''.join(parts)