diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b0728d..6fe68a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +### 1.0.3 + +* Fix UCS-2 encoding: fixes #49 and #53 + ### 1.0.2 * Add `tox.ini`, support `2.6`, `2.7`, `3.4`, `3.5`, `3.6` and `3.7` diff --git a/setup.py b/setup.py index 210a03d..f6afaa3 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='python-smpplib', - version='1.0.2', + version='1.0.3', url='https://github.com/podshumok/python-smpplib', description='SMPP library for python', long_description=open('README.md', 'rt').read(), diff --git a/smpplib/consts.py b/smpplib/consts.py index 3b73a71..b785d00 100644 --- a/smpplib/consts.py +++ b/smpplib/consts.py @@ -1,16 +1,18 @@ -import six - -EMPTY_STRING = six.b('') -NULL_STRING = six.b('\0') +EMPTY_STRING = b'' +NULL_STRING = b'\0' # Message part lengths in different encodings. -SEVENBIT_SIZE = 160 -EIGHTBIT_SIZE = 140 -UCS2_SIZE = 70 -SEVENBIT_MP_SIZE = SEVENBIT_SIZE - 7 -EIGHTBIT_MP_SIZE = EIGHTBIT_SIZE - 6 -UCS2_MP_SIZE = UCS2_SIZE - 3 +# SMPP 3.4, 2.2.1.2 +SEVENBIT_LENGTH = 160 +EIGHTBIT_LENGTH = 140 +UCS2_LENGTH = 70 + +MULTIPART_HEADER_SIZE = 6 + +SEVENBIT_PART_SIZE = SEVENBIT_LENGTH - 7 # TODO: where does 7 come from? +EIGHTBIT_PART_SIZE = 140 - MULTIPART_HEADER_SIZE +UCS2_PART_SIZE = 140 - MULTIPART_HEADER_SIZE # must be an even number anyway # SMPP error codes. diff --git a/smpplib/gsm.py b/smpplib/gsm.py index 8a78e7b..f820606 100644 --- a/smpplib/gsm.py +++ b/smpplib/gsm.py @@ -10,19 +10,21 @@ def make_parts(text, encoding=consts.SMPP_ENCODING_DEFAULT): """Returns tuple(parts, encoding, esm_class)""" try: # Try to encode with the user-defined encoding first. - encode, split_size, part_size = ENCODINGS[encoding] + encode, split_length, part_size = ENCODINGS[encoding] encoded_text = encode(text) except KeyError: raise NotImplementedError('encoding is not supported: %s' % encoding) except UnicodeError: # Fallback to UCS-2. encoding = consts.SMPP_ENCODING_ISO10646 - encode, split_size, part_size = ENCODINGS[encoding] + encode, split_length, part_size = ENCODINGS[encoding] encoded_text = encode(text) - if len(text) > split_size: + if len(text) > split_length: # Split the text into well-formed parts. esm_class = consts.SMPP_GSMFEAT_UDHI + # FIXME: 7-bit encoding has variable-length characters. + # FIXME: it means that a character may be broken by splitting. parts = make_parts_encoded(encoded_text, part_size) else: # Normal message. @@ -56,9 +58,9 @@ def gsm_encode(plaintext): # Map GSM encoding into a tuple of encode function, maximum single message size and a part size. # Add new entry here should you need to use another encoding. ENCODINGS = { - consts.SMPP_ENCODING_DEFAULT: (gsm_encode, consts.SEVENBIT_SIZE, consts.SEVENBIT_MP_SIZE), - consts.SMPP_ENCODING_ISO88591: (lambda text: text.encode('iso-8859-1'), consts.EIGHTBIT_SIZE, consts.EIGHTBIT_MP_SIZE), - consts.SMPP_ENCODING_ISO10646: (lambda text: text.encode('utf-16-be'), consts.UCS2_SIZE, consts.UCS2_MP_SIZE), + consts.SMPP_ENCODING_DEFAULT: (gsm_encode, consts.SEVENBIT_LENGTH, consts.SEVENBIT_PART_SIZE), + consts.SMPP_ENCODING_ISO88591: (lambda text: text.encode('iso-8859-1'), consts.EIGHTBIT_LENGTH, consts.EIGHTBIT_PART_SIZE), + consts.SMPP_ENCODING_ISO10646: (lambda text: text.encode('utf-16-be'), consts.UCS2_LENGTH, consts.UCS2_PART_SIZE), } diff --git a/tests/test_gsm.py b/tests/test_gsm.py index 6411a20..b6641fd 100644 --- a/tests/test_gsm.py +++ b/tests/test_gsm.py @@ -1,13 +1,13 @@ # -*- coding: utf8 -*- import mock -import pytest +from pytest import mark, raises from smpplib import consts from smpplib.gsm import gsm_encode, make_parts, make_parts_encoded -@pytest.mark.parametrize('plaintext, encoded_text', [ +@mark.parametrize('plaintext, encoded_text', [ (u'@', b'\x00'), (u'^', b'\x1B\x14'), ]) @@ -15,15 +15,15 @@ def test_gsm_encode(plaintext, encoded_text): assert gsm_encode(plaintext) == encoded_text -@pytest.mark.parametrize('plaintext', [ +@mark.parametrize('plaintext', [ (u'Ая',), ]) def test_gsm_encode_unicode_error(plaintext): - with pytest.raises(UnicodeError): + with raises(UnicodeError): gsm_encode(plaintext) -@pytest.mark.parametrize('plaintext, encoding, expected_parts, expected_encoding', [ +@mark.parametrize('plaintext, encoding, expected_parts, expected_encoding', [ (u'@', consts.SMPP_ENCODING_DEFAULT, [b'\x00'], consts.SMPP_ENCODING_DEFAULT), (u'Ая', consts.SMPP_ENCODING_DEFAULT, [b'\x04\x10\x04O'], consts.SMPP_ENCODING_ISO10646), (u'é', consts.SMPP_ENCODING_ISO88591, [b'\xe9'], consts.SMPP_ENCODING_ISO88591), @@ -32,10 +32,10 @@ def test_make_parts_single(plaintext, encoding, expected_parts, expected_encodin assert make_parts(plaintext, encoding) == (expected_parts, expected_encoding, consts.SMPP_MSGTYPE_DEFAULT) -@pytest.mark.parametrize('plaintext, expected', [ - (u'@' * consts.SEVENBIT_MP_SIZE * 2, [ - b'\x05\x00\x03\x42\x02\x01' + b'\x00' * consts.SEVENBIT_MP_SIZE, - b'\x05\x00\x03\x42\x02\x02' + b'\x00' * consts.SEVENBIT_MP_SIZE, +@mark.parametrize('plaintext, expected', [ + (u'@' * consts.SEVENBIT_PART_SIZE * 2, [ + b'\x05\x00\x03\x42\x02\x01' + b'\x00' * consts.SEVENBIT_PART_SIZE, + b'\x05\x00\x03\x42\x02\x02' + b'\x00' * consts.SEVENBIT_PART_SIZE, ]), ]) def test_make_parts_multiple(plaintext, expected): @@ -44,7 +44,7 @@ def test_make_parts_multiple(plaintext, expected): assert make_parts(plaintext) == (expected, consts.SMPP_ENCODING_DEFAULT, consts.SMPP_GSMFEAT_UDHI) -@pytest.mark.parametrize('encoded_text, part_size, expected', [ +@mark.parametrize('encoded_text, part_size, expected', [ (b'12345', 5, [b'\x05\x00\x03\x42\x01\x0112345']), (b'12345', 2, [b'\x05\x00\x03\x42\x03\x0112', b'\x05\x00\x03\x42\x03\x0234', b'\x05\x00\x03\x42\x03\x035']), ]) @@ -52,3 +52,11 @@ def test_make_parts_encoded(encoded_text, part_size, expected): with mock.patch('random.randint') as randint: randint.return_value = 0x42 assert make_parts_encoded(encoded_text, part_size) == expected + + +@mark.parametrize('text, expected', [ + (u'Привет мир!\n' * 10, 2), +]) +def test_part_number(text, expected): + parts, _, _ = make_parts(text) + assert len(parts) == expected