Source code for pymap.parsing.modutf7

"""Implements the modified UTF-7 specification used for encoding and decoding
mailbox names in IMAP.

See Also:
    `RFC 3501 5.1.3 <https://tools.ietf.org/html/rfc3501#section-5.1.3>`_

"""

from __future__ import annotations

__all__ = ['modutf7_encode', 'modutf7_decode']


def _modified_b64encode(src: str) -> bytes:
    # Inspired by Twisted Python's implementation:
    #   https://twistedmatrix.com/trac/browser/trunk/LICENSE
    src_utf7 = src.encode('utf-7')
    return src_utf7[1:-1].replace(b'/', b',')


def _modified_b64decode(src: bytes) -> str:
    # Inspired by Twisted Python's implementation:
    #   https://twistedmatrix.com/trac/browser/trunk/LICENSE
    src_utf7 = b'+%b-' % src.replace(b',', b'/')
    return src_utf7.decode('utf-7')



[docs]
def modutf7_encode(data: str) -> bytes:
    """Encode the string using modified UTF-7.

    Args:
        data: The input string to encode.

    """
    ret = bytearray()
    is_usascii = True
    encode_start = None
    for i, symbol in enumerate(data):
        charpoint = ord(symbol)
        if is_usascii:
            if charpoint == 0x26:
                ret.extend(b'&-')
            elif 0x20 <= charpoint <= 0x7e:
                ret.append(charpoint)
            else:
                encode_start = i
                is_usascii = False
        else:
            if 0x20 <= charpoint <= 0x7e:
                to_encode = data[encode_start:i]
                encoded = _modified_b64encode(to_encode)
                ret.append(0x26)
                ret.extend(encoded)
                ret.extend((0x2d, charpoint))
                is_usascii = True
    if not is_usascii:
        to_encode = data[encode_start:]
        encoded = _modified_b64encode(to_encode)
        ret.append(0x26)
        ret.extend(encoded)
        ret.append(0x2d)
    return bytes(ret)




[docs]
def modutf7_decode(data: bytes) -> str:
    """Decode the bytestring using modified UTF-7.

    Args:
        data: The encoded bytestring to decode.

    """
    parts = []
    is_usascii = True
    buf = memoryview(data)
    while buf:
        byte = buf[0]
        if is_usascii:
            if buf[0:2] == b'&-':
                parts.append('&')
                buf = buf[2:]
            elif byte == 0x26:
                is_usascii = False
                buf = buf[1:]
            else:
                parts.append(chr(byte))
                buf = buf[1:]
        else:
            for i, byte in enumerate(buf):
                if byte == 0x2d:
                    to_decode = buf[:i].tobytes()
                    decoded = _modified_b64decode(to_decode)
                    parts.append(decoded)
                    buf = buf[i + 1:]
                    is_usascii = True
                    break
    if not is_usascii:
        to_decode = buf.tobytes()
        decoded = _modified_b64decode(to_decode)
        parts.append(decoded)
    return ''.join(parts)