Skip to content

Commit 61dfb22

Browse files
authored
Merge pull request #439 from AlekMosingiewicz/handle-bom-in-script
Handle BOM in the beginning of the script
2 parents 7d5dda2 + b3f77f0 commit 61dfb22

5 files changed

Lines changed: 58 additions & 1 deletion

File tree

include/chaiscript/language/chaiscript_engine.hpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,27 @@ namespace chaiscript
204204
m_engine.add(fun([this](const std::string& t_namespace_name) { import(t_namespace_name); }), "import");
205205
}
206206

207+
/// Skip BOM at the beginning of file
208+
static bool skip_bom(std::ifstream &infile) {
209+
size_t bytes_needed = 3;
210+
char buffer[3];
211+
212+
memset(buffer, '\0', bytes_needed);
213+
214+
infile.read(buffer, static_cast<std::streamsize>(bytes_needed));
215+
216+
if ((buffer[0] == '\xef')
217+
&& (buffer[1] == '\xbb')
218+
&& (buffer[2] == '\xbf')) {
219+
220+
infile.seekg(3);
221+
return true;
222+
}
223+
224+
infile.seekg(0);
225+
226+
return false;
227+
}
207228

208229
/// Helper function for loading a file
209230
static std::string load_file(const std::string &t_filename) {
@@ -213,11 +234,16 @@ namespace chaiscript
213234
throw chaiscript::exception::file_not_found_error(t_filename);
214235
}
215236

216-
const auto size = infile.tellg();
237+
auto size = infile.tellg();
217238
infile.seekg(0, std::ios::beg);
218239

219240
assert(size >= 0);
220241

242+
if (skip_bom(infile)) {
243+
size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors
244+
assert(size >=0 ); //and check if there's more text
245+
}
246+
221247
if (size == std::streampos(0))
222248
{
223249
return std::string();

include/chaiscript/language/chaiscript_parser.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,10 +542,14 @@ namespace chaiscript
542542

543543
/// Skips ChaiScript whitespace, which means space and tab, but not cr/lf
544544
/// jespada: Modified SkipWS to skip optionally CR ('\n') and/or LF+CR ("\r\n")
545+
/// AlekMosingiewicz: Added exception when illegal character detected
545546
bool SkipWS(bool skip_cr=false) {
546547
bool retval = false;
547548

548549
while (m_position.has_more()) {
550+
if(static_cast<unsigned char>(*m_position) > 0x7e) {
551+
throw exception::eval_error("Illegal character", File_Position(m_position.line, m_position.col), *m_filename);
552+
}
549553
auto end_line = (*m_position != 0) && ((*m_position == '\n') || (*m_position == '\r' && *(m_position+1) == '\n'));
550554

551555
if ( char_in_alphabet(*m_position,detail::white_alphabet) || (skip_cr && end_line)) {

unittests/compiled_tests.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,29 @@ TEST_CASE("Functor cast")
352352
CHECK(d == 3 * 6);
353353
}
354354

355+
TEST_CASE("Non-ASCII characters in the middle of string")
356+
{
357+
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
358+
CHECK_THROWS_AS(chai.eval<std::string>("prin\xeft \"Hello World\""), chaiscript::exception::eval_error);
359+
}
360+
361+
TEST_CASE("Non-ASCII characters in the beginning of string")
362+
{
363+
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
364+
CHECK_THROWS_AS(chai.eval<std::string>("\xefprint \"Hello World\""), chaiscript::exception::eval_error);
365+
}
355366

367+
TEST_CASE("Non-ASCII characters in the end of string")
368+
{
369+
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
370+
CHECK_THROWS_AS(chai.eval<std::string>("print \"Hello World\"\xef"), chaiscript::exception::eval_error);
371+
}
372+
373+
TEST_CASE("BOM in string")
374+
{
375+
chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser());
376+
CHECK_THROWS_AS(chai.eval<std::string>("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error);
377+
}
356378

357379
int set_state_test_myfun()
358380
{

unittests/eval_file_with_bom.chai

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
eval_file("file_with_bom.inc")
2+
assert_true(alwaysTrue())

unittests/file_with_bom.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def alwaysTrue() {
2+
return true
3+
}

0 commit comments

Comments
 (0)