Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/pr-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ jobs:
- name: Run generate_opcode_metadata.py
run: python scripts/generate_opcode_metadata.py

- name: Check for formatting changes
run: |
if ! git diff --exit-code; then
echo "::error::Formatting changes detected. Please run 'cargo fmt --all', 'ruff format', and 'ruff check --select I --fix' locally."
exit 1
fi

- name: Post formatting suggestions
if: failure()
uses: reviewdog/action-suggester@v1
with:
tool_name: auto-format
Expand Down
84 changes: 46 additions & 38 deletions crates/stdlib/src/_tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ mod _tokenize {
types::{Constructor, IterNext, Iterable, SelfIter},
},
};
use core::fmt;
use ruff_python_ast::PySourceType;
use ruff_python_ast::token::{Token, TokenKind};
use ruff_python_parser::{
LexicalErrorType, ParseError, ParseErrorType, parse_unchecked_source,
};
use ruff_source_file::{LineIndex, LineRanges};
use ruff_text_size::{Ranged, TextSize};
use core::fmt;

const TOKEN_ENDMARKER: u8 = 0;
const TOKEN_DEDENT: u8 = 6;
Expand Down Expand Up @@ -114,8 +114,7 @@ mod _tokenize {
let line = zelf.readline(vm)?;
if line.is_empty() {
let accumulated = core::mem::take(source);
let parsed =
parse_unchecked_source(&accumulated, PySourceType::Python);
let parsed = parse_unchecked_source(&accumulated, PySourceType::Python);
let tokens: Vec<Token> = parsed.tokens().iter().copied().collect();
let errors: Vec<ParseError> = parsed.errors().to_vec();
let line_index = LineIndex::from_source_text(&accumulated);
Expand All @@ -135,8 +134,7 @@ mod _tokenize {
}
}
TokenizerPhase::Yielding { .. } => {
let result =
emit_next_token(&mut state, zelf.extra_tokens, vm)?;
let result = emit_next_token(&mut state, zelf.extra_tokens, vm)?;
*zelf.state.write() = state;
return Ok(result);
}
Expand Down Expand Up @@ -191,10 +189,16 @@ mod _tokenize {
.take(sl.saturating_sub(1))
.map(|l| l.len() + 1)
.sum();
let full_line =
source.full_line_str(TextSize::from(offset.min(source.len()) as u32));
let full_line = source.full_line_str(TextSize::from(offset.min(source.len()) as u32));
return Ok(PyIterReturn::Return(make_token_tuple(
vm, tok_type, &tok_str, sl, sc as isize, el, ec as isize, full_line,
vm,
tok_type,
&tok_str,
sl,
sc as isize,
el,
ec as isize,
full_line,
)));
}

Expand All @@ -218,9 +222,7 @@ mod _tokenize {
) {
continue;
}
if err.location.start() <= range.start()
&& range.start() < err.location.end()
{
if err.location.start() <= range.start() && range.start() < err.location.end() {
return Err(raise_indentation_error(vm, err, source, line_index));
}
}
Expand All @@ -230,15 +232,12 @@ mod _tokenize {
continue;
}

if !extra_tokens
&& matches!(kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
{
if !extra_tokens && matches!(kind, TokenKind::Comment | TokenKind::NonLogicalNewline) {
continue;
}

let raw_type = token_kind_value(kind);
let token_type = if extra_tokens && raw_type > TOKEN_DEDENT && raw_type < TOKEN_OP
{
let token_type = if extra_tokens && raw_type > TOKEN_DEDENT && raw_type < TOKEN_OP {
TOKEN_OP
} else {
raw_type
Expand Down Expand Up @@ -294,15 +293,21 @@ mod _tokenize {
&& (token_str.contains("{{") || token_str.contains("}}"))
{
let mut parts =
split_fstring_middle(token_str, token_type, start_line, start_col)
.into_iter();
split_fstring_middle(token_str, token_type, start_line, start_col).into_iter();
let (tt, ts, sl, sc, el, ec) = parts.next().unwrap();
let rest: Vec<_> = parts.collect();
for p in rest.into_iter().rev() {
pending_fstring_parts.push(p);
}
return Ok(PyIterReturn::Return(make_token_tuple(
vm, tt, &ts, sl, sc as isize, el, ec as isize, line_str,
vm,
tt,
&ts,
sl,
sc as isize,
el,
ec as isize,
line_str,
)));
}

Expand All @@ -315,17 +320,19 @@ mod _tokenize {
.is_some_and(|t| t.kind() == TokenKind::Rbrace)
{
let mid_type = find_fstring_middle_type(tokens, *index);
*pending_empty_fstring_middle = Some((
mid_type,
end_line,
end_col,
line_str.to_string(),
));
*pending_empty_fstring_middle =
Some((mid_type, end_line, end_col, line_str.to_string()));
}

return Ok(PyIterReturn::Return(make_token_tuple(
vm, token_type, token_str, start_line, start_col as isize, end_line,
end_col as isize, line_str,
vm,
token_type,
token_str,
start_line,
start_col as isize,
end_line,
end_col as isize,
line_str,
)));
}

Expand Down Expand Up @@ -380,14 +387,20 @@ mod _tokenize {
let (em_line, em_col, em_line_str): (usize, isize, &str) = if extra_tokens {
(last_line + 1, 0, "")
} else {
let last_line_text = source.full_line_str(TextSize::from(
source.len().saturating_sub(1) as u32,
));
let last_line_text =
source.full_line_str(TextSize::from(source.len().saturating_sub(1) as u32));
(last_line, -1, last_line_text)
};

let result = make_token_tuple(
vm, TOKEN_ENDMARKER, "", em_line, em_col, em_line, em_col, em_line_str,
vm,
TOKEN_ENDMARKER,
"",
em_line,
em_col,
em_line,
em_col,
em_line_str,
);
state.phase = TokenizerPhase::Done;
Ok(PyIterReturn::Return(result))
Expand Down Expand Up @@ -448,10 +461,7 @@ mod _tokenize {
lineno: usize,
offset: usize,
) -> rustpython_vm::builtins::PyBaseExceptionRef {
let exc = vm.new_exception_msg(
vm.ctx.exceptions.syntax_error.to_owned(),
msg.into(),
);
let exc = vm.new_exception_msg(vm.ctx.exceptions.syntax_error.to_owned(), msg.into());
let obj = exc.as_object();
let _ = obj.set_attr("msg", vm.ctx.new_str(msg), vm);
let _ = obj.set_attr("lineno", vm.ctx.new_int(lineno), vm);
Expand Down Expand Up @@ -739,9 +749,7 @@ mod _tokenize {
TokenKind::TStringStart => 62,
TokenKind::TStringMiddle => 63,
TokenKind::TStringEnd => 64,
TokenKind::IpyEscapeCommand
| TokenKind::Question
| TokenKind::Unknown => 67, // ERRORTOKEN
TokenKind::IpyEscapeCommand | TokenKind::Question | TokenKind::Unknown => 67, // ERRORTOKEN
}
}
}
4 changes: 2 additions & 2 deletions crates/stdlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ mod json;
mod locale;

mod _opcode;
#[path = "_tokenize.rs"]
mod _tokenize;
mod math;
#[cfg(all(feature = "host_env", any(unix, windows)))]
mod mmap;
Expand All @@ -49,8 +51,6 @@ mod pystruct;
mod random;
mod statistics;
mod suggestions;
#[path = "_tokenize.rs"]
mod _tokenize;
// TODO: maybe make this an extension module, if we ever get those
// mod re;
#[cfg(all(feature = "host_env", not(target_arch = "wasm32")))]
Expand Down
Loading