Skip to content

Commit 46d5a74

Browse files
committed
Parse newline, UTF-8, trailing comment, backslash
This adds support for: * multiline values (i.e. containing newlines or escaped \n), fixes theskumar#89 * backslashes in values, fixes theskumar#112 * trailing comments, fixes theskumar#141 * UTF-8 in unquoted values, fixes theskumar#147 Parsing is no longer line-based. That's why `parse_line` was replaced by `parse_binding`. Thanks to the previous commit, users of `parse_stream` don't have to deal with this change. This supersedes a previous pull-request, theskumar#142, which would add support for multiline values in `Dotenv.parse` but not in the CLI (`dotenv get` and `dotenv set`). The key-value binding regular expression was inspired by https://github.com/bkeepers/dotenv/blob/d749366b6009126b115fb7b63e0509566365859a/lib/dotenv/parser.rb#L14-L30 Parsing of escapes was fixed thanks to https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python/24519338#24519338
1 parent 04df286 commit 46d5a74

3 files changed

Lines changed: 103 additions & 42 deletions

File tree

dotenv/main.py

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,44 +15,76 @@
1515

1616
from .compat import StringIO
1717

18-
__escape_decoder = codecs.getdecoder('unicode_escape')
1918
__posix_variable = re.compile(r'\$\{[^\}]*\}')
2019

21-
Binding = namedtuple('Binding', 'key value original')
20+
_binding = re.compile(
21+
r"""
22+
(
23+
\s* # leading whitespace
24+
(?:export\s+)? # export
2225
26+
( '[\w]+' # single-quoted key
27+
| [\w]+ # or unquoted key
28+
)?
2329
24-
def decode_escaped(escaped):
25-
return __escape_decoder(escaped)[0]
30+
(?:\s*=\s*)? # equal sign
2631
32+
( '(?:\\'|[^'])*' # single-quoted value
33+
| "(?:\\"|[^"])*" # or double-quoted value
34+
| [^\#\r\n]+ # or unquoted value
35+
)?
2736
28-
def parse_line(line):
29-
line = line.strip()
37+
\s* # trailing whitespace
38+
(?:\#.*)? # comment
39+
\s* # trailing whitespace
40+
(?:\r|\n|\r\n)? # newline
41+
)
42+
""",
43+
re.MULTILINE | re.VERBOSE,
44+
)
3045

31-
# Ignore lines with `#` or which doesn't have `=` in it.
32-
if not line or line.startswith('#') or '=' not in line:
33-
return None, None
46+
_escape_sequence = re.compile(r"\\[\\'\"abfnrtv]")
3447

35-
k, v = line.split('=', 1)
3648

37-
if k.startswith('export '):
38-
(_, _, k) = k.partition('export ')
49+
Binding = namedtuple('Binding', 'key value original')
3950

40-
# Remove any leading and trailing spaces in key, value
41-
k, v = k.strip(), v.strip()
4251

43-
if v:
44-
v = v.encode('unicode-escape').decode('ascii')
45-
quoted = v[0] == v[-1] in ['"', "'"]
46-
if quoted:
47-
v = decode_escaped(v[1:-1])
52+
def decode_escapes(string):
53+
def decode_match(match):
54+
return codecs.decode(match.group(0), 'unicode-escape')
55+
56+
return _escape_sequence.sub(decode_match, string)
57+
4858

49-
return k, v
59+
def is_surrounded_by(string, char):
60+
return (
61+
len(string) > 1
62+
and string[0] == string[-1] == char
63+
)
64+
65+
66+
def parse_binding(string, position):
67+
match = _binding.match(string, position)
68+
(matched, key, value) = match.groups()
69+
if key is None or value is None:
70+
key = None
71+
value = None
72+
else:
73+
quoted = is_surrounded_by(value, "'") or is_surrounded_by(value, '"')
74+
if quoted:
75+
value = decode_escapes(value[1:-1])
76+
else:
77+
value = value.strip()
78+
return (Binding(key=key, value=value, original=matched), match.end())
5079

5180

5281
def parse_stream(stream):
53-
for line in stream:
54-
(key, value) = parse_line(line)
55-
yield Binding(key=key, value=value, original=line)
82+
string = stream.read()
83+
position = 0
84+
length = len(string)
85+
while position < length:
86+
(binding, position) = parse_binding(string, position)
87+
yield binding
5688

5789

5890
class DotEnv():

tests/test_cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def test_set_key(dotenv_file):
3939
with open(dotenv_file, 'r') as fp:
4040
assert 'HELLO="WORLD 2"\nfoo="bar"' == fp.read().strip()
4141

42+
success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, "HELLO", "WORLD\n3")
43+
44+
with open(dotenv_file, "r") as fp:
45+
assert 'HELLO="WORLD\n3"\nfoo="bar"' == fp.read().strip()
46+
4247

4348
def test_set_key_permission_error(dotenv_file):
4449
os.chmod(dotenv_file, 0o000)

tests/test_core.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sh
1010

1111
from dotenv import load_dotenv, find_dotenv, set_key, dotenv_values
12-
from dotenv.main import Binding, parse_line, parse_stream
12+
from dotenv.main import Binding, parse_stream
1313
from dotenv.compat import StringIO
1414
from IPython.terminal.embed import InteractiveShellEmbed
1515

@@ -24,34 +24,58 @@ def restore_os_environ():
2424
os.environ.update(environ)
2525

2626

27-
@pytest.mark.parametrize("test_input,expected", [
28-
("a=b", ("a", "b")),
29-
(" a = b ", ("a", "b")),
30-
("export a=b", ("a", "b")),
31-
(" export 'a'=b", ("'a'", "b")),
32-
(" export 'a'=b", ("'a'", "b")),
33-
("# a=b", (None, None)),
34-
("# a=b", (None, None)),
35-
("a=b space ", ('a', 'b space')),
36-
("a='b space '", ('a', 'b space ')),
37-
('a="b space "', ('a', 'b space ')),
38-
("export export_spam=1", ("export_spam", "1")),
39-
("export port=8000", ("port", "8000")),
40-
])
41-
def test_parse_line(test_input, expected):
42-
assert parse_line(test_input) == expected
43-
44-
4527
@pytest.mark.parametrize("test_input,expected", [
4628
("", []),
4729
("a=b", [Binding(key="a", value="b", original="a=b")]),
30+
(" a = b ", [Binding(key="a", value="b", original=" a = b ")]),
31+
("export a=b", [Binding(key="a", value="b", original="export a=b")]),
32+
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
33+
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
34+
("# a=b", [Binding(key=None, value=None, original="# a=b")]),
35+
('a=b # comment', [Binding(key="a", value="b", original="a=b # comment")]),
36+
("a=b space ", [Binding(key="a", value="b space", original="a=b space ")]),
37+
("a='b space '", [Binding(key="a", value="b space ", original="a='b space '")]),
38+
('a="b space "', [Binding(key="a", value="b space ", original='a="b space "')]),
39+
("export export_a=1", [Binding(key="export_a", value="1", original="export export_a=1")]),
40+
("export port=8000", [Binding(key="port", value="8000", original="export port=8000")]),
41+
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
42+
("a='b\nc'", [Binding(key="a", value="b\nc", original="a='b\nc'")]),
43+
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
44+
('a="b\\nc"', [Binding(key="a", value='b\nc', original='a="b\\nc"')]),
45+
('a="b\\"c"', [Binding(key="a", value='b"c', original='a="b\\"c"')]),
46+
("a='b\\'c'", [Binding(key="a", value="b'c", original="a='b\\'c'")]),
47+
("a=à", [Binding(key="a", value="à", original="a=à")]),
48+
('a="à"', [Binding(key="a", value="à", original='a="à"')]),
49+
('garbage', [Binding(key=None, value=None, original="garbage")]),
4850
(
4951
"a=b\nc=d",
5052
[
5153
Binding(key="a", value="b", original="a=b\n"),
5254
Binding(key="c", value="d", original="c=d"),
5355
],
5456
),
57+
(
58+
"a=b\r\nc=d",
59+
[
60+
Binding(key="a", value="b", original="a=b\r\n"),
61+
Binding(key="c", value="d", original="c=d"),
62+
],
63+
),
64+
(
65+
'a="\nb=c',
66+
[
67+
Binding(key="a", value='"', original='a="\n'),
68+
Binding(key="b", value='c', original="b=c"),
69+
]
70+
),
71+
(
72+
'# comment\na="b\nc"\nd=e\n',
73+
[
74+
Binding(key=None, value=None, original="# comment\n"),
75+
Binding(key="a", value="b\nc", original='a="b\nc"\n'),
76+
Binding(key="d", value="e", original="d=e\n"),
77+
],
78+
),
5579
])
5680
def test_parse_stream(test_input, expected):
5781
result = parse_stream(StringIO(test_input))

0 commit comments

Comments
 (0)