Skip to content

Commit b2cedd0

Browse files
author
Tomaz Solc
committed
Initial import.
0 parents  commit b2cedd0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

186 files changed

+340
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
build

perl2python.pl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
use Getopt::Long;
2+
3+
my $input = ".";
4+
my $output = ".";
5+
6+
$result = GetOptions("input=s" => \$input,
7+
"output=s" => \$output);
8+
9+
sub python_escape {
10+
my $x = shift;
11+
12+
return '' unless defined($x);
13+
14+
$x =~ s/\\/\\\\/gs;
15+
$x =~ s/'/\\'/gs;
16+
$x =~ s/([\x00-\x1f])/sprintf("\\x%02x", ord($1))/ges;
17+
18+
return $x;
19+
}
20+
21+
push(@INC, $input);
22+
23+
my $n;
24+
for($n = 0; $n < 256; $n++) {
25+
26+
eval( sprintf("require x%02x;\n", $n) );
27+
28+
next unless( $#{$Text::Unidecode::Char[$n]} >= 0 );
29+
30+
open(PYTHON, sprintf(">%s/x%02x.py", $output, $n));
31+
print PYTHON "data = (";
32+
33+
my $first = 1;
34+
for my $t (@{$Text::Unidecode::Char[$n]}) {
35+
if( $first ) {
36+
$first = 0;
37+
} else {
38+
print PYTHON ", ";
39+
}
40+
print PYTHON "'", &python_escape($t), "'";
41+
}
42+
43+
print PYTHON ")\n";
44+
close(PYTHON)
45+
}

setup.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/python
2+
3+
from distutils.core import Command, setup
4+
import unittest
5+
6+
UNITTESTS = [
7+
"tests.basic",
8+
]
9+
10+
class TestCommand(Command):
11+
user_options = [ ]
12+
13+
def initialize_options(self):
14+
pass
15+
16+
def finalize_options(self):
17+
pass
18+
19+
def run(self):
20+
suite = unittest.TestSuite()
21+
22+
suite.addTests(
23+
unittest.defaultTestLoader.loadTestsFromNames(
24+
UNITTESTS ) )
25+
26+
result = unittest.TextTestRunner(verbosity=2).run(suite)
27+
28+
setup(name='Unidecode',
29+
version='0.04.1',
30+
description='US-ASCII transliterations of Unicode text',
31+
author='Tomaz Solc',
32+
author_email='[email protected]',
33+
34+
packages = [ 'unidecode' ],
35+
36+
provides = [ 'unidecode' ],
37+
38+
cmdclass = { 'test': TestCommand }
39+
)

tests/__init__.py

Whitespace-only changes.

tests/basic.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# -*- coding: utf-8 -*-
2+
import unittest
3+
from unidecode import unidecode
4+
5+
class TestUnidecode(unittest.TestCase):
6+
def test_ascii(self):
7+
for n in xrange(0,128):
8+
t = chr(n)
9+
10+
self.failUnlessEqual(unidecode(t), t)
11+
12+
def test_specific(self):
13+
14+
TESTS = [
15+
(u"Hello, World!",
16+
"Hello, World!"),
17+
18+
(u"'\"\r\n",
19+
"'\"\r\n"),
20+
21+
(u"ČŽŠčžš",
22+
"CZSczs"),
23+
24+
(u"ア",
25+
"a"),
26+
27+
(u"α",
28+
"a"),
29+
30+
(u"а",
31+
"a"),
32+
]
33+
34+
for input, output in TESTS:
35+
self.failUnlessEqual(unidecode(input), output)

unidecode/__init__.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
Char = {}
2+
3+
NULLMAP = [ '' * 0x100 ]
4+
5+
def unidecode(string):
6+
retval = []
7+
8+
for char in string:
9+
o = ord(char)
10+
11+
if o < 0x80:
12+
retval.append(char)
13+
continue
14+
15+
h = o >> 8
16+
l = o & 0xff
17+
18+
c = Char.get(h, None)
19+
20+
if c == None:
21+
try:
22+
mod = __import__('unidecode.x%02x'%(h), [], [], ['data'])
23+
except ImportError:
24+
Char[h] = NULLMAP
25+
retval.append('')
26+
continue
27+
28+
Char[h] = mod.data
29+
30+
try:
31+
retval.append( mod.data[l] )
32+
except IndexError:
33+
retval.append( '' )
34+
else:
35+
try:
36+
retval.append( c[l] )
37+
except IndexError:
38+
retval.append( '' )
39+
40+
return ''.join(retval)

unidecode/x00.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data = ('\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ' ', '!', 'C/', 'PS', '$?', 'Y=', '|', 'SS', '"', '(c)', 'a', '<<', '!', '', '(r)', '-', 'deg', '+-', '2', '3', '\'', 'u', 'P', '*', ',', '1', 'o', '>>', '1/4', '1/2', '3/4', '?', 'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss', 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', '/', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y')

unidecode/x01.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data = ('A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'IJ', '', 'J', 'j', 'K', 'k', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', '\'n', 'ng', 'NG', 'O', 'o', 'O', 'o', 'O', 'o', 'OE', 'oe', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's', 'b', 'B', 'B', 'b', '6', '6', 'O', 'C', 'c', 'D', 'D', 'D', 'd', 'd', '3', '@', 'E', 'F', 'f', 'G', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'l', 'W', 'N', 'n', 'O', 'O', 'o', 'OI', 'oi', 'P', 'p', 'YR', '2', '2', 'SH', 'sh', 't', 'T', 't', 'T', 'U', 'u', 'Y', 'V', 'Y', 'y', 'Z', 'z', 'ZH', 'ZH', 'zh', 'zh', '2', '5', '5', 'ts', 'w', '|', '||', '|=', '!', 'DZ', 'Dz', 'dz', 'LJ', 'Lj', 'lj', 'NJ', 'Nj', 'nj', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', '@', 'A', 'a', 'A', 'a', 'AE', 'ae', 'G', 'g', 'G', 'g', 'K', 'k', 'O', 'o', 'O', 'o', 'ZH', 'zh', 'j', 'DZ', 'D', 'dz', 'G', 'g', 'HV', 'W', 'N', 'n', 'A', 'a', 'AE', 'ae', 'O', 'o')

unidecode/x02.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data = ('A', 'a', 'A', 'a', 'E', 'e', 'E', 'e', 'I', 'i', 'I', 'i', 'O', 'o', 'O', 'o', 'R', 'r', 'R', 'r', 'U', 'u', 'U', 'u', 'S', 's', 'T', 't', 'Y', 'y', 'H', 'h', '[?]', '[?]', 'OU', 'ou', 'Z', 'z', 'A', 'a', 'E', 'e', 'O', 'o', 'O', 'o', 'O', 'o', 'O', 'o', 'Y', 'y', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', 'a', 'a', 'a', 'b', 'o', 'c', 'd', 'd', 'e', '@', '@', 'e', 'e', 'e', 'e', 'j', 'g', 'g', 'g', 'g', 'u', 'Y', 'h', 'h', 'i', 'i', 'I', 'l', 'l', 'l', 'lZ', 'W', 'W', 'm', 'n', 'n', 'n', 'o', 'OE', 'O', 'F', 'R', 'R', 'R', 'R', 'r', 'r', 'R', 'R', 'R', 's', 'S', 'j', 'S', 'S', 't', 't', 'U', 'U', 'v', '^', 'W', 'Y', 'Y', 'z', 'z', 'Z', 'Z', '?', '?', '?', 'C', '@', 'B', 'E', 'G', 'H', 'j', 'k', 'L', 'q', '?', '?', 'dz', 'dZ', 'dz', 'ts', 'tS', 'tC', 'fN', 'ls', 'lz', 'WW', ']]', '[?]', '[?]', 'k', 'h', 'j', 'r', 'r', 'r', 'r', 'w', 'y', '\'', '"', '`', '\'', '`', '`', '\'', '?', '?', '<', '>', '^', 'V', '^', 'V', '\'', '-', '/', '\\', ',', '_', '\\', '/', ':', '.', '`', '\'', '^', 'V', '+', '-', 'V', '.', '@', ',', '~', '"', 'R', 'X', 'G', 'l', 's', 'x', '?', '', '', '', '', '', '', '', 'V', '=', '"', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]')

unidecode/x03.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data = ('', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '', '', '', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '\'', ',', '[?]', '[?]', '[?]', '[?]', '', '[?]', '[?]', '[?]', '?', '[?]', '[?]', '[?]', '[?]', '[?]', '', '', 'A', ';', 'E', 'E', 'I', '[?]', 'O', '[?]', 'U', 'O', 'I', 'A', 'B', 'G', 'D', 'E', 'Z', 'E', 'Th', 'I', 'K', 'L', 'M', 'N', 'Ks', 'O', 'P', 'R', '[?]', 'S', 'T', 'U', 'Ph', 'Kh', 'Ps', 'O', 'I', 'U', 'a', 'e', 'e', 'i', 'u', 'a', 'b', 'g', 'd', 'e', 'z', 'e', 'th', 'i', 'k', 'l', 'm', 'n', 'x', 'o', 'p', 'r', 's', 's', 't', 'u', 'ph', 'kh', 'ps', 'o', 'i', 'u', 'o', 'u', 'o', '[?]', 'b', 'th', 'U', 'U', 'U', 'ph', 'p', '&', '[?]', '[?]', 'St', 'st', 'W', 'w', 'Q', 'q', 'Sp', 'sp', 'Sh', 'sh', 'F', 'f', 'Kh', 'kh', 'H', 'h', 'G', 'g', 'CH', 'ch', 'Ti', 'ti', 'k', 'r', 'c', 'j', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]', '[?]')

0 commit comments

Comments
 (0)