Skip to content

Commit f1abb22

Browse files
author
amaury.forgeotdarc
committed
Issues #2384 and #3975: Tracebacks were not correctly printed when the source file
contains a ``coding:`` header: the wrong line was displayed, and the encoding was not respected. Patch by Victor Stinner. git-svn-id: http://svn.python.org/projects/python/branches/py3k@66867 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 6183132 commit f1abb22

4 files changed

Lines changed: 218 additions & 81 deletions

File tree

Lib/test/test_traceback.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import unittest
77
import re
88
from test.support import run_unittest, is_jython, Error, captured_output
9+
from test.support import TESTFN, unlink
910

1011
import traceback
1112

@@ -90,6 +91,70 @@ def test_without_exception(self):
9091
err = traceback.format_exception_only(None, None)
9192
self.assertEqual(err, ['None\n'])
9293

94+
def test_encoded_file(self):
95+
# Test that tracebacks are correctly printed for encoded source files:
96+
# - correct line number (Issue2384)
97+
# - respect file encoding (Issue3975)
98+
import tempfile, sys, subprocess, os
99+
100+
# The spawned subprocess has its stdout redirected to a PIPE, and its
101+
# encoding may be different from the current interpreter, on Windows
102+
# at least.
103+
process = subprocess.Popen([sys.executable, "-c",
104+
"import sys; print(sys.stdout.encoding)"],
105+
stdout=subprocess.PIPE,
106+
stderr=subprocess.STDOUT)
107+
stdout, stderr = process.communicate()
108+
output_encoding = str(stdout, 'ascii').splitlines()[0]
109+
110+
def do_test(firstlines, message, charset, lineno):
111+
# Raise the message in a subprocess, and catch the output
112+
try:
113+
output = open(TESTFN, "w", encoding=charset)
114+
output.write("""{0}if 1:
115+
import traceback;
116+
raise RuntimeError('{1}')
117+
""".format(firstlines, message))
118+
output.close()
119+
process = subprocess.Popen([sys.executable, TESTFN],
120+
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
121+
stdout, stderr = process.communicate()
122+
stdout = stdout.decode(output_encoding).splitlines()
123+
finally:
124+
unlink(TESTFN)
125+
126+
# The source lines are encoded with the 'backslashreplace' handler
127+
encoded_message = message.encode(output_encoding,
128+
'backslashreplace')
129+
# and we just decoded them with the output_encoding.
130+
message_ascii = encoded_message.decode(output_encoding)
131+
132+
err_line = "raise RuntimeError('{0}')".format(message_ascii)
133+
err_msg = "RuntimeError: {0}".format(message_ascii)
134+
135+
self.assert_(("line %s" % lineno) in stdout[1],
136+
"Invalid line number: {0!r} instead of {1}".format(
137+
stdout[1], lineno))
138+
self.assert_(stdout[2].endswith(err_line),
139+
"Invalid traceback line: {0!r} instead of {1!r}".format(
140+
stdout[2], err_line))
141+
self.assert_(stdout[3] == err_msg,
142+
"Invalid error message: {0!r} instead of {1!r}".format(
143+
stdout[3], err_msg))
144+
145+
do_test("", "foo", "ascii", 3)
146+
for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
147+
if charset == "ascii":
148+
text = "foo"
149+
elif charset == "GBK":
150+
text = "\u4E02\u5100"
151+
else:
152+
text = "h\xe9 ho"
153+
do_test("# coding: {0}\n".format(charset),
154+
text, charset, 4)
155+
do_test("#!shebang\n# coding: {0}\n".format(charset),
156+
text, charset, 5)
157+
93158

94159
class TracebackFormatTests(unittest.TestCase):
95160

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ What's New in Python 3.0 beta 5
1515
Core and Builtins
1616
-----------------
1717

18+
- Issues #2384 and #3975: Tracebacks were not correctly printed when the
19+
source file contains a ``coding:`` header: the wrong line was displayed, and
20+
the encoding was not respected.
21+
1822
- Issue #3740: Null-initialize module state.
1923

2024
- Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object.

Parser/tokenizer.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,14 @@ fp_setreadl(struct tok_state *tok, const char* enc)
461461
readline = PyObject_GetAttrString(stream, "readline");
462462
tok->decoding_readline = readline;
463463

464+
/* The file has been reopened; parsing will restart from
465+
* the beginning of the file, we have to reset the line number.
466+
* But this function has been called from inside tok_nextc() which
467+
* will increment lineno before it returns. So we set it -1 so that
468+
* the next call to tok_nextc() will start with tok->lineno == 0.
469+
*/
470+
tok->lineno = -1;
471+
464472
cleanup:
465473
Py_XDECREF(stream);
466474
Py_XDECREF(io);

Python/traceback.c

Lines changed: 141 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@
88
#include "structmember.h"
99
#include "osdefs.h"
1010
#include "traceback.h"
11+
#ifdef HAVE_FCNTL_H
12+
#include <fcntl.h>
13+
#endif
1114

1215
#define OFF(x) offsetof(PyTracebackObject, x)
1316

17+
/* Method from Parser/tokenizer.c */
18+
extern char * PyTokenizer_FindEncoding(int);
19+
1420
static PyObject *
1521
tb_dir(PyTracebackObject *self)
1622
{
@@ -128,102 +134,156 @@ PyTraceBack_Here(PyFrameObject *frame)
128134
return 0;
129135
}
130136

137+
static int
138+
_Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags)
139+
{
140+
int i;
141+
int fd = -1;
142+
PyObject *v;
143+
Py_ssize_t _npath;
144+
int npath;
145+
size_t taillen;
146+
PyObject *syspath;
147+
const char* path;
148+
const char* tail;
149+
Py_ssize_t len;
150+
151+
/* Search tail of filename in sys.path before giving up */
152+
tail = strrchr(filename, SEP);
153+
if (tail == NULL)
154+
tail = filename;
155+
else
156+
tail++;
157+
taillen = strlen(tail);
158+
159+
syspath = PySys_GetObject("path");
160+
if (syspath == NULL || !PyList_Check(syspath))
161+
return -1;
162+
_npath = PyList_Size(syspath);
163+
npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
164+
165+
for (i = 0; i < npath; i++) {
166+
v = PyList_GetItem(syspath, i);
167+
if (v == NULL) {
168+
PyErr_Clear();
169+
break;
170+
}
171+
if (!PyUnicode_Check(v))
172+
continue;
173+
path = _PyUnicode_AsStringAndSize(v, &len);
174+
if (len + 1 + taillen >= (Py_ssize_t)namelen - 1)
175+
continue; /* Too long */
176+
strcpy(namebuf, path);
177+
if (strlen(namebuf) != len)
178+
continue; /* v contains '\0' */
179+
if (len > 0 && namebuf[len-1] != SEP)
180+
namebuf[len++] = SEP;
181+
strcpy(namebuf+len, tail);
182+
Py_BEGIN_ALLOW_THREADS
183+
fd = open(namebuf, open_flags);
184+
Py_END_ALLOW_THREADS
185+
if (0 <= fd) {
186+
return fd;
187+
}
188+
}
189+
return -1;
190+
}
191+
131192
int
132193
_Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent)
133194
{
134195
int err = 0;
135-
FILE *xfp = NULL;
136-
char linebuf[2000];
196+
int fd;
137197
int i;
138-
char namebuf[MAXPATHLEN+1];
198+
char *found_encoding;
199+
char *encoding;
200+
PyObject *fob = NULL;
201+
PyObject *lineobj = NULL;
202+
#ifdef O_BINARY
203+
const int open_flags = O_RDONLY | O_BINARY; /* necessary for Windows */
204+
#else
205+
const int open_flags = O_RDONLY;
206+
#endif
207+
char buf[MAXPATHLEN+1];
208+
Py_UNICODE *u, *p;
209+
Py_ssize_t len;
139210

211+
/* open the file */
140212
if (filename == NULL)
141-
return -1;
142-
xfp = fopen(filename, "r" PY_STDIOTEXTMODE);
143-
if (xfp == NULL) {
144-
/* Search tail of filename in sys.path before giving up */
145-
PyObject *path;
146-
const char *tail = strrchr(filename, SEP);
147-
if (tail == NULL)
148-
tail = filename;
149-
else
150-
tail++;
151-
path = PySys_GetObject("path");
152-
if (path != NULL && PyList_Check(path)) {
153-
Py_ssize_t _npath = PyList_Size(path);
154-
int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
155-
size_t taillen = strlen(tail);
156-
for (i = 0; i < npath; i++) {
157-
PyObject *v = PyList_GetItem(path, i);
158-
if (v == NULL) {
159-
PyErr_Clear();
160-
break;
161-
}
162-
if (PyBytes_Check(v)) {
163-
size_t len;
164-
len = PyBytes_GET_SIZE(v);
165-
if (len + 1 + taillen >= MAXPATHLEN)
166-
continue; /* Too long */
167-
strcpy(namebuf, PyBytes_AsString(v));
168-
if (strlen(namebuf) != len)
169-
continue; /* v contains '\0' */
170-
if (len > 0 && namebuf[len-1] != SEP)
171-
namebuf[len++] = SEP;
172-
strcpy(namebuf+len, tail);
173-
xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE);
174-
if (xfp != NULL) {
175-
filename = namebuf;
176-
break;
177-
}
178-
}
179-
}
180-
}
213+
return 0;
214+
Py_BEGIN_ALLOW_THREADS
215+
fd = open(filename, open_flags);
216+
Py_END_ALLOW_THREADS
217+
if (fd < 0) {
218+
fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags);
219+
if (fd < 0)
220+
return 0;
221+
filename = buf;
181222
}
182223

183-
if (xfp == NULL)
184-
return err;
185-
if (err != 0) {
186-
fclose(xfp);
187-
return err;
188-
}
224+
/* use the right encoding to decode the file as unicode */
225+
found_encoding = PyTokenizer_FindEncoding(fd);
226+
encoding = (found_encoding != NULL) ? found_encoding :
227+
(char*)PyUnicode_GetDefaultEncoding();
228+
lseek(fd, 0, 0); /* Reset position */
229+
fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding,
230+
NULL, NULL, 1);
231+
PyMem_FREE(found_encoding);
232+
if (fob == NULL) {
233+
PyErr_Clear();
234+
close(fd);
235+
return 0;
236+
}
189237

238+
/* get the line number lineno */
190239
for (i = 0; i < lineno; i++) {
191-
char* pLastChar = &linebuf[sizeof(linebuf)-2];
192-
do {
193-
*pLastChar = '\0';
194-
if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL)
195-
break;
196-
/* fgets read *something*; if it didn't get as
197-
far as pLastChar, it must have found a newline
198-
or hit the end of the file; if pLastChar is \n,
199-
it obviously found a newline; else we haven't
200-
yet seen a newline, so must continue */
201-
} while (*pLastChar != '\0' && *pLastChar != '\n');
240+
Py_XDECREF(lineobj);
241+
lineobj = PyFile_GetLine(fob, -1);
242+
if (!lineobj) {
243+
err = -1;
244+
break;
245+
}
202246
}
203-
if (i == lineno) {
204-
char buf[11];
205-
char *p = linebuf;
206-
while (*p == ' ' || *p == '\t' || *p == '\014')
207-
p++;
208-
209-
/* Write some spaces before the line */
210-
strcpy(buf, " ");
211-
assert (strlen(buf) == 10);
212-
while (indent > 0) {
213-
if(indent < 10)
214-
buf[indent] = '\0';
215-
err = PyFile_WriteString(buf, f);
216-
if (err != 0)
217-
break;
218-
indent -= 10;
247+
Py_DECREF(fob);
248+
if (!lineobj || !PyUnicode_Check(lineobj)) {
249+
Py_XDECREF(lineobj);
250+
return err;
251+
}
252+
253+
/* remove the indentation of the line */
254+
u = PyUnicode_AS_UNICODE(lineobj);
255+
len = PyUnicode_GET_SIZE(lineobj);
256+
for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
257+
len--;
258+
if (u != p) {
259+
PyObject *truncated;
260+
truncated = PyUnicode_FromUnicode(p, len);
261+
if (truncated) {
262+
Py_DECREF(lineobj);
263+
lineobj = truncated;
264+
} else {
265+
PyErr_Clear();
219266
}
267+
}
220268

221-
if (err == 0)
222-
err = PyFile_WriteString(p, f);
223-
if (err == 0 && strchr(p, '\n') == NULL)
224-
err = PyFile_WriteString("\n", f);
269+
/* Write some spaces before the line */
270+
strcpy(buf, " ");
271+
assert (strlen(buf) == 10);
272+
while (indent > 0) {
273+
if(indent < 10)
274+
buf[indent] = '\0';
275+
err = PyFile_WriteString(buf, f);
276+
if (err != 0)
277+
break;
278+
indent -= 10;
225279
}
226-
fclose(xfp);
280+
281+
/* finally display the line */
282+
if (err == 0)
283+
err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
284+
Py_DECREF(lineobj);
285+
if (err == 0)
286+
err = PyFile_WriteString("\n", f);
227287
return err;
228288
}
229289

0 commit comments

Comments
 (0)