Skip to content

Commit d33366c

Browse files
bbc2theskumar
authored andcommitted
Add support for newlines, backslashes, trailing comments and unquoted UTF-8 (theskumar#148)
* Fix deprecation warning for POSIX variable regex This was also caught by Flake8 as: ./dotenv/main.py:19:2: W605 invalid escape sequence '\$' ./dotenv/main.py:19:4: W605 invalid escape sequence '\{' ./dotenv/main.py:19:8: W605 invalid escape sequence '\}' ./dotenv/main.py:19:12: W605 invalid escape sequence '\}' * Turn get_stream into a context manager This avoids the use of the `is_file` class variable by abstracting away the difference between `StringIO` and a file stream. * Deduplicate parsing code and abstract away lines Parsing .env files is a critical part of this package. To make it easier to change it and test it, it is important that it is done in only one place. Also, code that uses the parser now doesn't depend on the fact that each key-value binding spans exactly one line. This will make it easier to handle multiline bindings in the future. * Parse newline, UTF-8, trailing comment, backslash This adds support for: * multiline values (i.e. containing newlines or escaped \n), fixes theskumar#89 * backslashes in values, fixes theskumar#112 * trailing comments, fixes theskumar#141 * UTF-8 in unquoted values, fixes theskumar#147 Parsing is no longer line-based. That's why `parse_line` was replaced by `parse_binding`. Thanks to the previous commit, users of `parse_stream` don't have to deal with this change. This supersedes a previous pull-request, theskumar#142, which would add support for multiline values in `Dotenv.parse` but not in the CLI (`dotenv get` and `dotenv set`). The key-value binding regular expression was inspired by https://github.com/bkeepers/dotenv/blob/d749366b6009126b115fb7b63e0509566365859a/lib/dotenv/parser.rb#L14-L30 Parsing of escapes was fixed thanks to https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python/24519338#24519338
1 parent 3b7e60e commit d33366c

File tree

3 files changed

+240
-97
lines changed

3 files changed

+240
-97
lines changed

dotenv/main.py

Lines changed: 110 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,90 @@
22
from __future__ import absolute_import, print_function, unicode_literals
33

44
import codecs
5-
import fileinput
65
import io
76
import os
87
import re
8+
import shutil
99
import sys
1010
from subprocess import Popen
11+
import tempfile
1112
import warnings
12-
from collections import OrderedDict
13+
from collections import OrderedDict, namedtuple
14+
from contextlib import contextmanager
1315

1416
from .compat import StringIO, PY2, WIN, text_type
1517

16-
__escape_decoder = codecs.getdecoder('unicode_escape')
17-
__posix_variable = re.compile('\$\{[^\}]*\}') # noqa
18+
__posix_variable = re.compile(r'\$\{[^\}]*\}')
1819

20+
_binding = re.compile(
21+
r"""
22+
(
23+
\s* # leading whitespace
24+
(?:export\s+)? # export
1925
20-
def decode_escaped(escaped):
21-
return __escape_decoder(escaped)[0]
26+
( '[^']+' # single-quoted key
27+
| [^=\#\s]+ # or unquoted key
28+
)?
2229
30+
(?:
31+
(?:\s*=\s*) # equal sign
2332
24-
def parse_line(line):
25-
line = line.strip()
33+
( '(?:\\'|[^'])*' # single-quoted value
34+
| "(?:\\"|[^"])*" # or double-quoted value
35+
| [^\#\r\n]* # or unquoted value
36+
)
37+
)?
2638
27-
# Ignore lines with `#` or which doesn't have `=` in it.
28-
if not line or line.startswith('#') or '=' not in line:
29-
return None, None
39+
\s* # trailing whitespace
40+
(?:\#[^\r\n]*)? # comment
41+
(?:\r|\n|\r\n)? # newline
42+
)
43+
""",
44+
re.MULTILINE | re.VERBOSE,
45+
)
3046

31-
k, v = line.split('=', 1)
47+
_escape_sequence = re.compile(r"\\[\\'\"abfnrtv]")
3248

33-
if k.startswith('export '):
34-
(_, _, k) = k.partition('export ')
3549

36-
# Remove any leading and trailing spaces in key, value
37-
k, v = k.strip(), v.strip()
50+
Binding = namedtuple('Binding', 'key value original')
3851

39-
if v:
40-
v = v.encode('unicode-escape').decode('ascii')
41-
quoted = v[0] == v[-1] in ['"', "'"]
42-
if quoted:
43-
v = decode_escaped(v[1:-1])
4452

45-
return k, v
53+
def decode_escapes(string):
54+
def decode_match(match):
55+
return codecs.decode(match.group(0), 'unicode-escape')
56+
57+
return _escape_sequence.sub(decode_match, string)
58+
59+
60+
def is_surrounded_by(string, char):
61+
return (
62+
len(string) > 1
63+
and string[0] == string[-1] == char
64+
)
65+
66+
67+
def parse_binding(string, position):
68+
match = _binding.match(string, position)
69+
(matched, key, value) = match.groups()
70+
if key is None or value is None:
71+
key = None
72+
value = None
73+
else:
74+
value_quoted = is_surrounded_by(value, "'") or is_surrounded_by(value, '"')
75+
if value_quoted:
76+
value = decode_escapes(value[1:-1])
77+
else:
78+
value = value.strip()
79+
return (Binding(key=key, value=value, original=matched), match.end())
80+
81+
82+
def parse_stream(stream):
83+
string = stream.read()
84+
position = 0
85+
length = len(string)
86+
while position < length:
87+
(binding, position) = parse_binding(string, position)
88+
yield binding
4689

4790

4891
class DotEnv():
@@ -52,19 +95,17 @@ def __init__(self, dotenv_path, verbose=False):
5295
self._dict = None
5396
self.verbose = verbose
5497

98+
@contextmanager
5599
def _get_stream(self):
56-
self._is_file = False
57100
if isinstance(self.dotenv_path, StringIO):
58-
return self.dotenv_path
59-
60-
if os.path.isfile(self.dotenv_path):
61-
self._is_file = True
62-
return io.open(self.dotenv_path)
63-
64-
if self.verbose:
65-
warnings.warn("File doesn't exist {}".format(self.dotenv_path))
66-
67-
return StringIO('')
101+
yield self.dotenv_path
102+
elif os.path.isfile(self.dotenv_path):
103+
with io.open(self.dotenv_path) as stream:
104+
yield stream
105+
else:
106+
if self.verbose:
107+
warnings.warn("File doesn't exist {}".format(self.dotenv_path))
108+
yield StringIO('')
68109

69110
def dict(self):
70111
"""Return dotenv as dict"""
@@ -76,17 +117,10 @@ def dict(self):
76117
return self._dict
77118

78119
def parse(self):
79-
f = self._get_stream()
80-
81-
for line in f:
82-
key, value = parse_line(line)
83-
if not key:
84-
continue
85-
86-
yield key, value
87-
88-
if self._is_file:
89-
f.close()
120+
with self._get_stream() as stream:
121+
for mapping in parse_stream(stream):
122+
if mapping.key is not None and mapping.value is not None:
123+
yield mapping.key, mapping.value
90124

91125
def set_as_environment_variables(self, override=False):
92126
"""
@@ -126,6 +160,20 @@ def get_key(dotenv_path, key_to_get):
126160
return DotEnv(dotenv_path, verbose=True).get(key_to_get)
127161

128162

163+
@contextmanager
164+
def rewrite(path):
165+
try:
166+
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as dest:
167+
with io.open(path) as source:
168+
yield (source, dest)
169+
except BaseException:
170+
if os.path.isfile(dest.name):
171+
os.unlink(dest.name)
172+
raise
173+
else:
174+
shutil.move(dest.name, path)
175+
176+
129177
def set_key(dotenv_path, key_to_set, value_to_set, quote_mode="always"):
130178
"""
131179
Adds or Updates a key/value to the given .env
@@ -141,20 +189,19 @@ def set_key(dotenv_path, key_to_set, value_to_set, quote_mode="always"):
141189
if " " in value_to_set:
142190
quote_mode = "always"
143191

144-
line_template = '{}="{}"' if quote_mode == "always" else '{}={}'
192+
line_template = '{}="{}"\n' if quote_mode == "always" else '{}={}\n'
145193
line_out = line_template.format(key_to_set, value_to_set)
146194

147-
replaced = False
148-
for line in fileinput.input(dotenv_path, inplace=True):
149-
k, v = parse_line(line)
150-
if k == key_to_set:
151-
replaced = True
152-
line = "{}\n".format(line_out)
153-
print(line, end='')
154-
155-
if not replaced:
156-
with io.open(dotenv_path, "a") as f:
157-
f.write("{}\n".format(line_out))
195+
with rewrite(dotenv_path) as (source, dest):
196+
replaced = False
197+
for mapping in parse_stream(source):
198+
if mapping.key == key_to_set:
199+
dest.write(line_out)
200+
replaced = True
201+
else:
202+
dest.write(mapping.original)
203+
if not replaced:
204+
dest.write(line_out)
158205

159206
return True, key_to_set, value_to_set
160207

@@ -166,18 +213,17 @@ def unset_key(dotenv_path, key_to_unset, quote_mode="always"):
166213
If the .env path given doesn't exist, fails
167214
If the given key doesn't exist in the .env, fails
168215
"""
169-
removed = False
170-
171216
if not os.path.exists(dotenv_path):
172217
warnings.warn("can't delete from %s - it doesn't exist." % dotenv_path)
173218
return None, key_to_unset
174219

175-
for line in fileinput.input(dotenv_path, inplace=True):
176-
k, v = parse_line(line)
177-
if k == key_to_unset:
178-
removed = True
179-
line = ''
180-
print(line, end='')
220+
removed = False
221+
with rewrite(dotenv_path) as (source, dest):
222+
for mapping in parse_stream(source):
223+
if mapping.key == key_to_unset:
224+
removed = True
225+
else:
226+
dest.write(mapping.original)
181227

182228
if not removed:
183229
warnings.warn("key %s not removed from %s - key doesn't exist." % (key_to_unset, dotenv_path))

tests/test_cli.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# -*- coding: utf-8 -*-
2-
from os import environ
2+
import os
33
from os.path import dirname, join
44

5+
import pytest
6+
import sh
7+
58
import dotenv
6-
from dotenv.version import __version__
79
from dotenv.cli import cli as dotenv_cli
8-
9-
import sh
10+
from dotenv.version import __version__
1011

1112
here = dirname(__file__)
1213
dotenv_path = join(here, '.env')
@@ -38,6 +39,22 @@ def test_set_key(dotenv_file):
3839
with open(dotenv_file, 'r') as fp:
3940
assert 'HELLO="WORLD 2"\nfoo="bar"' == fp.read().strip()
4041

42+
success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, "HELLO", "WORLD\n3")
43+
44+
with open(dotenv_file, "r") as fp:
45+
assert 'HELLO="WORLD\n3"\nfoo="bar"' == fp.read().strip()
46+
47+
48+
def test_set_key_permission_error(dotenv_file):
49+
os.chmod(dotenv_file, 0o000)
50+
51+
with pytest.raises(Exception):
52+
dotenv.set_key(dotenv_file, "HELLO", "WORLD")
53+
54+
os.chmod(dotenv_file, 0o600)
55+
with open(dotenv_file, "r") as fp:
56+
assert fp.read() == ""
57+
4158

4259
def test_list(cli, dotenv_file):
4360
success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, 'HELLO', 'WORLD')
@@ -59,6 +76,13 @@ def test_list_wo_file(cli):
5976
assert 'Invalid value for "-f"' in result.output
6077

6178

79+
def test_empty_value():
80+
with open(dotenv_path, "w") as f:
81+
f.write("TEST=")
82+
assert dotenv.get_key(dotenv_path, "TEST") == ""
83+
sh.rm(dotenv_path)
84+
85+
6286
def test_key_value_without_quotes():
6387
with open(dotenv_path, 'w') as f:
6488
f.write("TEST = value \n")
@@ -95,18 +119,41 @@ def test_value_with_special_characters():
95119
sh.rm(dotenv_path)
96120

97121

98-
def test_unset():
99-
sh.touch(dotenv_path)
100-
success, key_to_set, value_to_set = dotenv.set_key(dotenv_path, 'HELLO', 'WORLD')
101-
stored_value = dotenv.get_key(dotenv_path, 'HELLO')
102-
assert stored_value == 'WORLD'
103-
success, key_to_unset = dotenv.unset_key(dotenv_path, 'HELLO')
104-
assert success is True
105-
assert dotenv.get_key(dotenv_path, 'HELLO') is None
106-
success, key_to_unset = dotenv.unset_key(dotenv_path, 'RANDOM')
107-
assert success is None
122+
def test_value_with_new_lines():
123+
with open(dotenv_path, 'w') as f:
124+
f.write('TEST="a\nb"')
125+
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
126+
sh.rm(dotenv_path)
127+
128+
with open(dotenv_path, 'w') as f:
129+
f.write("TEST='a\nb'")
130+
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
131+
sh.rm(dotenv_path)
132+
133+
134+
def test_value_after_comment():
135+
with open(dotenv_path, "w") as f:
136+
f.write("# comment\nTEST=a")
137+
assert dotenv.get_key(dotenv_path, "TEST") == "a"
108138
sh.rm(dotenv_path)
109-
success, key_to_unset = dotenv.unset_key(dotenv_path, 'HELLO')
139+
140+
141+
def test_unset_ok(dotenv_file):
142+
with open(dotenv_file, "w") as f:
143+
f.write("a=b\nc=d")
144+
145+
success, key_to_unset = dotenv.unset_key(dotenv_file, "a")
146+
147+
assert success is True
148+
assert key_to_unset == "a"
149+
with open(dotenv_file, "r") as f:
150+
assert f.read() == "c=d"
151+
sh.rm(dotenv_file)
152+
153+
154+
def test_unset_non_existing_file():
155+
success, key_to_unset = dotenv.unset_key('/non-existing', 'HELLO')
156+
110157
assert success is None
111158

112159

@@ -180,7 +227,7 @@ def test_get_key_with_interpolation(cli):
180227
stored_value = dotenv.get_key(dotenv_path, 'BAR')
181228
assert stored_value == 'CONCATENATED_WORLD_POSIX_VAR'
182229
# test replace from environ taking precedence over file
183-
environ["HELLO"] = "TAKES_PRECEDENCE"
230+
os.environ["HELLO"] = "TAKES_PRECEDENCE"
184231
stored_value = dotenv.get_key(dotenv_path, 'FOO')
185232
assert stored_value == "TAKES_PRECEDENCE"
186233
sh.rm(dotenv_path)
@@ -194,10 +241,10 @@ def test_get_key_with_interpolation_of_unset_variable(cli):
194241
stored_value = dotenv.get_key(dotenv_path, 'FOO')
195242
assert stored_value == ''
196243
# unless present in environment
197-
environ['NOT_SET'] = 'BAR'
244+
os.environ['NOT_SET'] = 'BAR'
198245
stored_value = dotenv.get_key(dotenv_path, 'FOO')
199246
assert stored_value == 'BAR'
200-
del(environ['NOT_SET'])
247+
del(os.environ['NOT_SET'])
201248
sh.rm(dotenv_path)
202249

203250

0 commit comments

Comments
 (0)