Skip to content

Commit b1ea204

Browse files
xingyaowwneubig
andauthored
Migrate multi-line-bash-related sandbox tests into runtime tests and fix multi-line issue (OpenHands#3128)
* Remove global config from memory * Remove runtime global config * Remove from storage * Remove global config * Fix event stream tests * Fix sandbox issue * Change config * Removed transferred tests * Add swe env box * Fixes on testing * Fixed some tests * Merge with stashed changes * Fix typing * Fix ipython test * Revive function * Make temp_dir fixture * Remove test to avoid circular import * fix eventstream filestore for test_runtime * fix parse arg issue that cause integration test to fail * support swebench pull from custom namespace * add back simple tests for runtime * move multi-line bash tests to test_runtime; support multi-line bash for esruntime; * add testcase to handle PS2 prompt * use bashlex for bash parsing to handle multi-line commands; add testcases for multi-line commands * revert ghcr runtime change --------- Co-authored-by: Graham Neubig <[email protected]>
1 parent 8b77e8a commit b1ea204

11 files changed

Lines changed: 612 additions & 154 deletions

File tree

opendevin/core/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from termcolor import colored
1010

1111
DISABLE_COLOR_PRINTING = False
12-
DEBUG = False
12+
DEBUG = os.getenv('DEBUG', 'False').lower() in ['true', '1', 'yes']
1313

1414
ColorType = Literal[
1515
'red',

opendevin/runtime/client/client.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
Plugin,
4747
)
4848
from opendevin.runtime.server.files import insert_lines, read_lines
49+
from opendevin.runtime.utils import split_bash_commands
4950

5051
app = FastAPI()
5152

@@ -79,14 +80,23 @@ def _init_bash_shell(self, work_dir: str) -> None:
7980
r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
8081
)
8182

82-
self.shell.sendline(f'export PS1="{self.__bash_PS1}"')
83+
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
8384
self.shell.expect(self.__bash_expect_regex)
8485

8586
self.shell.sendline(f'cd {work_dir}')
8687
self.shell.expect(self.__bash_expect_regex)
8788

8889
def _get_bash_prompt(self):
8990
ps1 = self.shell.after
91+
92+
# begin at the last occurence of '[PEXPECT_BEGIN]'.
93+
# In multi-line bash commands, the prompt will be repeated
94+
# and the matched regex captures all of them
95+
# - we only want the last one (newest prompt)
96+
_begin_pos = ps1.rfind('[PEXPECT_BEGIN]')
97+
if _begin_pos != -1:
98+
ps1 = ps1[_begin_pos:]
99+
90100
# parse the ps1 to get username, hostname, and working directory
91101
matched = re.match(self.__bash_expect_regex, ps1)
92102
assert (
@@ -102,7 +112,7 @@ def _get_bash_prompt(self):
102112
prompt += '$'
103113
return prompt + ' '
104114

105-
def _execute_bash(self, command, keep_prompt: bool = True) -> tuple[str, int]:
115+
def _execute_bash(self, command: str, keep_prompt: bool = True) -> tuple[str, int]:
106116
logger.debug(f'Executing command: {command}')
107117
self.shell.sendline(command)
108118
self.shell.expect(self.__bash_expect_regex)
@@ -129,10 +139,22 @@ async def run_action(self, action) -> Observation:
129139

130140
async def run(self, action: CmdRunAction) -> CmdOutputObservation:
131141
try:
132-
output, exit_code = self._execute_bash(action.command)
142+
commands = split_bash_commands(action.command)
143+
all_output = ''
144+
for command in commands:
145+
output, exit_code = self._execute_bash(command)
146+
if all_output:
147+
# previous output already exists with prompt "user@hostname:working_dir #""
148+
# we need to add the command to the previous output,
149+
# so model knows the following is the output of another action)
150+
all_output = all_output.rstrip() + ' ' + command + '\r\n'
151+
152+
all_output += str(output) + '\r\n'
153+
if exit_code != 0:
154+
break
133155
return CmdOutputObservation(
134156
command_id=-1,
135-
content=str(output),
157+
content=all_output.rstrip('\r\n'),
136158
command=action.command,
137159
exit_code=exit_code,
138160
)

opendevin/runtime/client/runtime.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(
5858
# TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
5959
self.docker_client: docker.DockerClient = self._init_docker_client()
6060
self.container_image = (
61-
config.sandbox.container_image
61+
self.config.sandbox.container_image
6262
if container_image is None
6363
else container_image
6464
)
@@ -103,7 +103,7 @@ def _init_docker_client() -> docker.DockerClient:
103103
async def _init_container(
104104
self,
105105
sandbox_workspace_dir: str,
106-
mount_dir: str,
106+
mount_dir: str | None = None,
107107
plugins: list[PluginRequirement] | None = None,
108108
):
109109
try:
@@ -124,6 +124,14 @@ async def _init_container(
124124
else:
125125
port_mapping = {f'{self._port}/tcp': self._port}
126126

127+
if mount_dir is not None:
128+
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
129+
else:
130+
logger.warn(
131+
'Mount dir is not set, will not mount the workspace directory to the container.'
132+
)
133+
volumes = None
134+
127135
container = self.docker_client.containers.run(
128136
self.container_image,
129137
command=(
@@ -139,7 +147,7 @@ async def _init_container(
139147
name=self.container_name,
140148
detach=True,
141149
environment={'DEBUG': 'true'} if self.config.debug else None,
142-
volumes={mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}},
150+
volumes=volumes,
143151
)
144152
logger.info(f'Container started. Server url: {self.api_url}')
145153
return container

opendevin/runtime/runtime.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@
3333
from opendevin.storage import FileStore
3434

3535

36-
def _default_env_vars(config: SandboxConfig) -> dict[str, str]:
36+
def _default_env_vars(sandbox_config: SandboxConfig) -> dict[str, str]:
3737
ret = {}
3838
for key in os.environ:
3939
if key.startswith('SANDBOX_ENV_'):
4040
sandbox_key = key.removeprefix('SANDBOX_ENV_')
4141
ret[sandbox_key] = os.environ[key]
42-
if config.enable_auto_lint:
42+
if sandbox_config.enable_auto_lint:
4343
ret['ENABLE_AUTO_LINT'] = 'true'
4444
return ret
4545

opendevin/runtime/server/runtime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ async def run(self, action: CmdRunAction) -> Observation:
115115

116116
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
117117
self._run_command(
118-
("cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{action.code}\n' 'EOL'),
118+
f"cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n{action.code}\nEOL"
119119
)
120120

121121
# run the code

opendevin/runtime/utils/bash.py

Lines changed: 45 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,49 @@
1-
def split_bash_commands(commands):
2-
# States
3-
NORMAL = 0
4-
IN_SINGLE_QUOTE = 1
5-
IN_DOUBLE_QUOTE = 2
6-
IN_HEREDOC = 3
7-
8-
state = NORMAL
9-
heredoc_trigger = None
10-
result = []
11-
current_command: list[str] = []
12-
13-
i = 0
14-
while i < len(commands):
15-
char = commands[i]
16-
17-
if state == NORMAL:
18-
if char == "'":
19-
state = IN_SINGLE_QUOTE
20-
elif char == '"':
21-
state = IN_DOUBLE_QUOTE
22-
elif char == '\\':
23-
# Check if this is escaping a newline
24-
if i + 1 < len(commands) and commands[i + 1] == '\n':
25-
i += 1 # Skip the newline
26-
# Continue with the next line as part of the same command
27-
i += 1 # Move to the first character of the next line
28-
continue
29-
elif char == '\n':
30-
if not heredoc_trigger and current_command:
31-
result.append(''.join(current_command).strip())
32-
current_command = []
33-
elif char == '<' and commands[i : i + 2] == '<<':
34-
# Detect heredoc
35-
state = IN_HEREDOC
36-
i += 2 # Skip '<<'
37-
while commands[i] == ' ':
38-
i += 1
39-
start = i
40-
while commands[i] not in [' ', '\n']:
41-
i += 1
42-
heredoc_trigger = commands[start:i]
43-
current_command.append(commands[start - 2 : i]) # Include '<<'
44-
continue # Skip incrementing i at the end of the loop
45-
current_command.append(char)
46-
47-
elif state == IN_SINGLE_QUOTE:
48-
current_command.append(char)
49-
if char == "'" and commands[i - 1] != '\\':
50-
state = NORMAL
1+
import bashlex
512

52-
elif state == IN_DOUBLE_QUOTE:
53-
current_command.append(char)
54-
if char == '"' and commands[i - 1] != '\\':
55-
state = NORMAL
3+
from opendevin.core.logger import opendevin_logger as logger
564

57-
elif state == IN_HEREDOC:
58-
current_command.append(char)
59-
if (
60-
char == '\n'
61-
and heredoc_trigger
62-
and commands[i + 1 : i + 1 + len(heredoc_trigger) + 1]
63-
== heredoc_trigger + '\n'
64-
):
65-
# Check if the next line starts with the heredoc trigger followed by a newline
66-
i += (
67-
len(heredoc_trigger) + 1
68-
) # Move past the heredoc trigger and newline
69-
current_command.append(
70-
heredoc_trigger + '\n'
71-
) # Include the heredoc trigger and newline
72-
result.append(''.join(current_command).strip())
73-
current_command = []
74-
heredoc_trigger = None
75-
state = NORMAL
76-
continue
77-
78-
i += 1
79-
80-
# Add the last command if any
81-
if current_command:
82-
result.append(''.join(current_command).strip())
83-
84-
# Remove any empty strings from the result
85-
result = [cmd for cmd in result if cmd]
865

6+
def split_bash_commands(commands):
7+
try:
8+
parsed = bashlex.parse(commands)
9+
except bashlex.errors.ParsingError as e:
10+
logger.error(
11+
f'Failed to parse bash commands\n[input]: {commands}\n[error]: {e}'
12+
)
13+
# If parsing fails, return the original commands
14+
return [commands]
15+
16+
result: list[str] = []
17+
last_end = 0
18+
19+
for node in parsed:
20+
start, end = node.pos
21+
22+
# Include any text between the last command and this one
23+
if start > last_end:
24+
between = commands[last_end:start]
25+
logger.debug(f'BASH PARSING between: {between}')
26+
if result:
27+
result[-1] += between.rstrip()
28+
elif between.strip():
29+
# THIS SHOULD NOT HAPPEN
30+
result.append(between.rstrip())
31+
32+
# Extract the command, preserving original formatting
33+
command = commands[start:end].rstrip()
34+
logger.debug(f'BASH PARSING command: {command}')
35+
result.append(command)
36+
37+
last_end = end
38+
39+
# Add any remaining text after the last command to the last command
40+
remaining = commands[last_end:].rstrip()
41+
logger.debug(f'BASH PARSING remaining: {remaining}')
42+
if last_end < len(commands) and result:
43+
result[-1] += remaining
44+
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
45+
elif last_end < len(commands):
46+
if remaining:
47+
result.append(remaining)
48+
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
8749
return result

poetry.lock

Lines changed: 13 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ pathspec = "^0.12.1"
3939
google-cloud-aiplatform = "*"
4040
grep-ast = "0.3.2"
4141
tree-sitter = "0.21.3"
42+
bashlex = "^0.18"
4243

4344
[tool.poetry.group.llama-index.dependencies]
4445
llama-index = "*"
@@ -72,6 +73,7 @@ reportlab = "*"
7273
[tool.coverage.run]
7374
concurrency = ["gevent"]
7475

76+
7577
[tool.poetry.group.runtime.dependencies]
7678
jupyterlab = "*"
7779
notebook = "*"
@@ -105,6 +107,7 @@ ignore = ["D1"]
105107
[tool.ruff.lint.pydocstyle]
106108
convention = "google"
107109

110+
108111
[tool.poetry.group.evaluation.dependencies]
109112
streamlit = "*"
110113
whatthepatch = "*"

0 commit comments

Comments
 (0)