Skip to content

Commit 80a0d66

Browse files
committed
Added mathematical symbols in SMP
Included are various versions of latin and greek alphabets and digits.
1 parent 5f1a7a9 commit 80a0d66

5 files changed

Lines changed: 1064 additions & 1 deletion

File tree

tests/basic_2.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,33 @@ def test_bmp(self):
1515
t = unichr(n)
1616
unidecode(t)
1717

18+
def test_mathematical_latin(self):
19+
# 13 consecutive sequences of A-Z, a-z with some codepoints
20+
# undefined. We just count the undefined ones and don't check
21+
# positions.
22+
empty = 0
23+
for n in xrange(0x1d400, 0x1d6a4):
24+
if n % 52 < 26:
25+
a = chr(ord('A') + n % 26)
26+
else:
27+
a = chr(ord('a') + n % 26)
28+
b = unidecode(unichr(n))
29+
30+
if not b:
31+
empty += 1
32+
else:
33+
self.failUnlessEqual(unidecode(b), a)
34+
35+
self.failUnlessEqual(empty, 24)
36+
37+
def test_mathematical_digits(self):
38+
# 5 consecutive sequences of 0-9
39+
for n in xrange(0x1d7ce, 0x1d800):
40+
a = chr(ord('0') + (n-0x1d7ce) % 10)
41+
b = unidecode(unichr(n))
42+
43+
self.failUnlessEqual(unidecode(b), a)
44+
1845
def test_specific(self):
1946

2047
TESTS = [
@@ -58,7 +85,11 @@ def test_specific(self):
5885

5986
# Non-BMP character
6087
(u'\U0001d5a0',
61-
''),
88+
'A'),
89+
90+
# Mathematical
91+
(u'\U0001d5c4\U0001d5c6/\U0001d5c1',
92+
'km/h'),
6293
]
6394

6495
for input, output in TESTS:

unidecode/x1d4.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
data = (
2+
'A', # 0x00
3+
'B', # 0x01
4+
'C', # 0x02
5+
'D', # 0x03
6+
'E', # 0x04
7+
'F', # 0x05
8+
'G', # 0x06
9+
'H', # 0x07
10+
'I', # 0x08
11+
'J', # 0x09
12+
'K', # 0x0a
13+
'L', # 0x0b
14+
'M', # 0x0c
15+
'N', # 0x0d
16+
'O', # 0x0e
17+
'P', # 0x0f
18+
'Q', # 0x10
19+
'R', # 0x11
20+
'S', # 0x12
21+
'T', # 0x13
22+
'U', # 0x14
23+
'V', # 0x15
24+
'W', # 0x16
25+
'X', # 0x17
26+
'Y', # 0x18
27+
'Z', # 0x19
28+
'a', # 0x1a
29+
'b', # 0x1b
30+
'c', # 0x1c
31+
'd', # 0x1d
32+
'e', # 0x1e
33+
'f', # 0x1f
34+
'g', # 0x20
35+
'h', # 0x21
36+
'i', # 0x22
37+
'j', # 0x23
38+
'k', # 0x24
39+
'l', # 0x25
40+
'm', # 0x26
41+
'n', # 0x27
42+
'o', # 0x28
43+
'p', # 0x29
44+
'q', # 0x2a
45+
'r', # 0x2b
46+
's', # 0x2c
47+
't', # 0x2d
48+
'u', # 0x2e
49+
'v', # 0x2f
50+
'w', # 0x30
51+
'x', # 0x31
52+
'y', # 0x32
53+
'z', # 0x33
54+
'A', # 0x34
55+
'B', # 0x35
56+
'C', # 0x36
57+
'D', # 0x37
58+
'E', # 0x38
59+
'F', # 0x39
60+
'G', # 0x3a
61+
'H', # 0x3b
62+
'I', # 0x3c
63+
'J', # 0x3d
64+
'K', # 0x3e
65+
'L', # 0x3f
66+
'M', # 0x40
67+
'N', # 0x41
68+
'O', # 0x42
69+
'P', # 0x43
70+
'Q', # 0x44
71+
'R', # 0x45
72+
'S', # 0x46
73+
'T', # 0x47
74+
'U', # 0x48
75+
'V', # 0x49
76+
'W', # 0x4a
77+
'X', # 0x4b
78+
'Y', # 0x4c
79+
'Z', # 0x4d
80+
'a', # 0x4e
81+
'b', # 0x4f
82+
'c', # 0x50
83+
'd', # 0x51
84+
'e', # 0x52
85+
'f', # 0x53
86+
'g', # 0x54
87+
'', # 0x55
88+
'i', # 0x56
89+
'j', # 0x57
90+
'k', # 0x58
91+
'l', # 0x59
92+
'm', # 0x5a
93+
'n', # 0x5b
94+
'o', # 0x5c
95+
'p', # 0x5d
96+
'q', # 0x5e
97+
'r', # 0x5f
98+
's', # 0x60
99+
't', # 0x61
100+
'u', # 0x62
101+
'v', # 0x63
102+
'w', # 0x64
103+
'x', # 0x65
104+
'y', # 0x66
105+
'z', # 0x67
106+
'A', # 0x68
107+
'B', # 0x69
108+
'C', # 0x6a
109+
'D', # 0x6b
110+
'E', # 0x6c
111+
'F', # 0x6d
112+
'G', # 0x6e
113+
'H', # 0x6f
114+
'I', # 0x70
115+
'J', # 0x71
116+
'K', # 0x72
117+
'L', # 0x73
118+
'M', # 0x74
119+
'N', # 0x75
120+
'O', # 0x76
121+
'P', # 0x77
122+
'Q', # 0x78
123+
'R', # 0x79
124+
'S', # 0x7a
125+
'T', # 0x7b
126+
'U', # 0x7c
127+
'V', # 0x7d
128+
'W', # 0x7e
129+
'X', # 0x7f
130+
'Y', # 0x80
131+
'Z', # 0x81
132+
'a', # 0x82
133+
'b', # 0x83
134+
'c', # 0x84
135+
'd', # 0x85
136+
'e', # 0x86
137+
'f', # 0x87
138+
'g', # 0x88
139+
'h', # 0x89
140+
'i', # 0x8a
141+
'j', # 0x8b
142+
'k', # 0x8c
143+
'l', # 0x8d
144+
'm', # 0x8e
145+
'n', # 0x8f
146+
'o', # 0x90
147+
'p', # 0x91
148+
'q', # 0x92
149+
'r', # 0x93
150+
's', # 0x94
151+
't', # 0x95
152+
'u', # 0x96
153+
'v', # 0x97
154+
'w', # 0x98
155+
'x', # 0x99
156+
'y', # 0x9a
157+
'z', # 0x9b
158+
'A', # 0x9c
159+
'', # 0x9d
160+
'C', # 0x9e
161+
'D', # 0x9f
162+
'', # 0xa0
163+
'', # 0xa1
164+
'G', # 0xa2
165+
'', # 0xa3
166+
'', # 0xa4
167+
'J', # 0xa5
168+
'K', # 0xa6
169+
'', # 0xa7
170+
'', # 0xa8
171+
'N', # 0xa9
172+
'O', # 0xaa
173+
'P', # 0xab
174+
'Q', # 0xac
175+
'', # 0xad
176+
'S', # 0xae
177+
'T', # 0xaf
178+
'U', # 0xb0
179+
'V', # 0xb1
180+
'W', # 0xb2
181+
'X', # 0xb3
182+
'Y', # 0xb4
183+
'Z', # 0xb5
184+
'a', # 0xb6
185+
'b', # 0xb7
186+
'c', # 0xb8
187+
'd', # 0xb9
188+
'', # 0xba
189+
'f', # 0xbb
190+
'', # 0xbc
191+
'h', # 0xbd
192+
'i', # 0xbe
193+
'j', # 0xbf
194+
'k', # 0xc0
195+
'l', # 0xc1
196+
'm', # 0xc2
197+
'n', # 0xc3
198+
'', # 0xc4
199+
'p', # 0xc5
200+
'q', # 0xc6
201+
'r', # 0xc7
202+
's', # 0xc8
203+
't', # 0xc9
204+
'u', # 0xca
205+
'v', # 0xcb
206+
'w', # 0xcc
207+
'x', # 0xcd
208+
'y', # 0xce
209+
'z', # 0xcf
210+
'A', # 0xd0
211+
'B', # 0xd1
212+
'C', # 0xd2
213+
'D', # 0xd3
214+
'E', # 0xd4
215+
'F', # 0xd5
216+
'G', # 0xd6
217+
'H', # 0xd7
218+
'I', # 0xd8
219+
'J', # 0xd9
220+
'K', # 0xda
221+
'L', # 0xdb
222+
'M', # 0xdc
223+
'N', # 0xdd
224+
'O', # 0xde
225+
'P', # 0xdf
226+
'Q', # 0xe0
227+
'R', # 0xe1
228+
'S', # 0xe2
229+
'T', # 0xe3
230+
'U', # 0xe4
231+
'V', # 0xe5
232+
'W', # 0xe6
233+
'X', # 0xe7
234+
'Y', # 0xe8
235+
'Z', # 0xe9
236+
'a', # 0xea
237+
'b', # 0xeb
238+
'c', # 0xec
239+
'd', # 0xed
240+
'e', # 0xee
241+
'f', # 0xef
242+
'g', # 0xf0
243+
'h', # 0xf1
244+
'i', # 0xf2
245+
'j', # 0xf3
246+
'k', # 0xf4
247+
'l', # 0xf5
248+
'm', # 0xf6
249+
'n', # 0xf7
250+
'o', # 0xf8
251+
'p', # 0xf9
252+
'q', # 0xfa
253+
'r', # 0xfb
254+
's', # 0xfc
255+
't', # 0xfd
256+
'u', # 0xfe
257+
'v', # 0xff
258+
)

0 commit comments

Comments
 (0)