Skip to content

Commit b431fce

Browse files
tobitegeenyst
andauthored
tests: more Agentskills tests; updated .gitignore (OpenHands#2307)
* added tests related to backticks * updated .gitignore * added extra linter test for OpenHands#2210 * hotfix for integration test --------- Co-authored-by: Engel Nyst <[email protected]>
1 parent 6aba337 commit b431fce

7 files changed

Lines changed: 205 additions & 28 deletions

File tree

.gitignore

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,14 @@ cython_debug/
161161
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
162162
.idea/
163163
.vscode/
164+
.cursorignore
164165

165166
# evaluation
167+
evaluation/evaluation_outputs
168+
evaluation/outputs
169+
evaluation/swe_bench/eval_workspace*
166170
evaluation/SWE-bench/data
171+
evaluation/webarena/scripts/webarena_env.sh
167172

168173
# frontend
169174

@@ -176,6 +181,8 @@ frontend/yarn.lock
176181

177182
# testing
178183
frontend/coverage
184+
test_results*
185+
/_test_files_tmp/
179186

180187
# production
181188
frontend/build
@@ -204,9 +211,3 @@ cache
204211
# configuration
205212
config.toml
206213
config.toml.bak
207-
evaluation/swe_bench/eval_workspace*
208-
evaluation/outputs
209-
evaluation/evaluation_outputs
210-
test_results*
211-
/_test_files_tmp/
212-
evaluation/webarena/scripts/webarena_env.sh

agenthub/browsing_agent/browsing_agent.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,7 @@ def step(self, state: State) -> Action:
117117
error_prefix = ''
118118
last_obs = None
119119
last_action = None
120-
if len(state.history) == 1:
121-
# initialize and retrieve the first observation by issuing an noop OP
122-
# TODO: need more elegant way of doing this
123-
return BrowseInteractiveAction(browser_actions='noop()')
120+
124121
for prev_action, obs in state.history:
125122
if isinstance(prev_action, BrowseInteractiveAction):
126123
prev_actions.append(prev_action.browser_actions)
@@ -133,7 +130,7 @@ def step(self, state: State) -> Action:
133130
# agent has responded, task finish.
134131
return AgentFinishAction(outputs={'content': prev_action.content})
135132

136-
prev_action_str = '\n'.join(prev_actions[1:])
133+
prev_action_str = '\n'.join(prev_actions)
137134
# if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
138135
# we should also send a message back to the user in OpenDevin and call it a day
139136
if (

opendevin/runtime/plugins/agent_skills/agentskills.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434

3535
ENABLE_AUTO_LINT = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true'
3636

37+
# This is also used in unit tests!
38+
MSG_FILE_UPDATED = '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
39+
3740
# OPENAI
3841
OPENAI_API_KEY = os.getenv(
3942
'OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', '')
@@ -311,6 +314,7 @@ def edit_file(start: int, end: int, content: str) -> None:
311314

312315
lint_error = _lint_file(CURRENT_FILE)
313316
if lint_error:
317+
# only change any literal strings here in combination with unit tests!
314318
print(
315319
'[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]'
316320
)
@@ -351,9 +355,7 @@ def edit_file(start: int, end: int, content: str) -> None:
351355
f'[File: {os.path.abspath(CURRENT_FILE)} ({n_total_lines} lines total after edit)]'
352356
)
353357
_print_window(CURRENT_FILE, CURRENT_LINE, WINDOW)
354-
print(
355-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
356-
)
358+
print(MSG_FILE_UPDATED)
357359

358360

359361
@update_pwd_decorator

tests/integration/mock/BrowsingAgent/test_browse_internet/prompt_001.log

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ Don't execute multiple actions at once if you need feedback from the page.
114114
----------
115115

116116
# Current Accessibility Tree:
117-
RootWebArea '', focused
117+
118118

119119
# Previous Actions
120120

tests/integration/mock/CodeActAgent/test_browse_internet/prompt_003.log

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ RootWebArea 'The Ultimate Answer', focused
121121
[10] button 'Click me', clickable
122122

123123
# Previous Actions
124-
noop()
124+
goto('http://localhost:8000')
125125

126126
Here is an example with chain of thought of a valid action when clicking on a button:
127127
"

tests/integration/mock/CodeActAgent/test_browse_internet/prompt_004.log

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ RootWebArea 'The Ultimate Answer', focused
122122
StaticText 'The answer is OpenDevin is all you need!'
123123

124124
# Previous Actions
125-
noop()
125+
goto('http://localhost:8000')
126126
click("10")
127127

128128
Here is an example with chain of thought of a valid action when clicking on a button:

tests/unit/test_agent_skill.py

Lines changed: 188 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import pytest
77

88
from opendevin.runtime.plugins.agent_skills.agentskills import (
9+
MSG_FILE_UPDATED,
10+
_print_window,
911
create_file,
1012
edit_file,
1113
find_file,
@@ -274,6 +276,127 @@ def test_scroll_down_edge(tmp_path):
274276
assert result.split('\n') == expected.split('\n')
275277

276278

279+
def test_print_window_internal(tmp_path):
280+
test_file_path = tmp_path / 'a.txt'
281+
create_file(str(test_file_path))
282+
open_file(str(test_file_path))
283+
with open(test_file_path, 'w') as file:
284+
for i in range(1, 101):
285+
file.write(f'Line `{i}`\n')
286+
287+
# Define the parameters for the test
288+
current_line = 50
289+
window = 2
290+
291+
# Test _print_window especially with backticks
292+
with io.StringIO() as buf:
293+
with contextlib.redirect_stdout(buf):
294+
_print_window(str(test_file_path), current_line, window, return_str=False)
295+
result = buf.getvalue()
296+
expected = (
297+
'(49 more lines above)\n'
298+
'50|Line `50`\n'
299+
'51|Line `51`\n'
300+
'(49 more lines below)\n'
301+
)
302+
assert result == expected
303+
304+
305+
def test_edit_file_window(tmp_path, monkeypatch):
306+
# Set environment variable via monkeypatch does NOT work!
307+
monkeypatch.setattr(
308+
'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', True
309+
)
310+
311+
content = """def any_int(a, b, c):
312+
return isinstance(a, int) and isinstance(b, int) and isinstance(c, int)
313+
314+
def test_any_int():
315+
assert any_int(1, 2, 3) == True
316+
assert any_int(1.5, 2, 3) == False
317+
assert any_int(1, 2.5, 3) == False
318+
assert any_int(1, 2, 3.5) == False
319+
assert any_int(1.0, 2, 3) == False
320+
assert any_int(1, 2.0, 3) == False
321+
assert any_int(1, 2, 3.0) == False
322+
assert any_int(0, 0, 0) == True
323+
assert any_int(-1, -2, -3) == True
324+
assert any_int(1, -2, 3) == True
325+
assert any_int(1.5, -2, 3) == False
326+
assert any_int(1, -2.5, 3) == False
327+
328+
def check(any_int):
329+
# Check some simple cases
330+
assert any_int(2, 3, 1)==True, "This prints if this assert fails 1 (good for debugging!)"
331+
assert any_int(2.5, 2, 3)==False, "This prints if this assert fails 2 (good for debugging!)"
332+
assert any_int(1.5, 5, 3.5)==False, "This prints if this assert fails 3 (good for debugging!)"
333+
assert any_int(2, 6, 2)==False, "This prints if this assert fails 4 (good for debugging!)"
334+
assert any_int(4, 2, 2)==True, "This prints if this assert fails 5 (good for debugging!)"
335+
assert any_int(2.2, 2.2, 2.2)==False, "This prints if this assert fails 6 (good for debugging!)"
336+
assert any_int(-4, 6, 2)==True, "This prints if this assert fails 7 (good for debugging!)"
337+
338+
# Check some edge cases that are easy to work out by hand.
339+
assert any_int(2,1,1)==True, "This prints if this assert fails 8 (also good for debugging!)"
340+
assert any_int(3,4,7)==True, "This prints if this assert fails 9 (also good for debugging!)"
341+
assert any_int(3.0,4,7)==False, "This prints if this assert fails 10 (also good for debugging!)"
342+
343+
check(any_int)"""
344+
345+
temp_file_path = tmp_path / 'error-test.py'
346+
temp_file_path.write_text(content)
347+
348+
open_file(str(temp_file_path))
349+
350+
with io.StringIO() as buf:
351+
with contextlib.redirect_stdout(buf):
352+
edit_file(
353+
start=9, end=9, content=' assert any_int(1.0, 2, 3) == False'
354+
)
355+
result = buf.getvalue()
356+
expected = (
357+
'[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
358+
'ERRORS:\n'
359+
+ str(temp_file_path)
360+
+ ':9:9: '
361+
+ 'E999 IndentationError: unexpected indent\n'
362+
'[This is how your edit would have looked if applied]\n'
363+
'-------------------------------------------------\n'
364+
'(5 more lines above)\n'
365+
'6| assert any_int(1.5, 2, 3) == False\n'
366+
'7| assert any_int(1, 2.5, 3) == False\n'
367+
'8| assert any_int(1, 2, 3.5) == False\n'
368+
'9| assert any_int(1.0, 2, 3) == False\n'
369+
'10| assert any_int(1, 2.0, 3) == False\n'
370+
'11| assert any_int(1, 2, 3.0) == False\n'
371+
'12| assert any_int(0, 0, 0) == True\n'
372+
'13| assert any_int(-1, -2, -3) == True\n'
373+
'14| assert any_int(1, -2, 3) == True\n'
374+
'15| assert any_int(1.5, -2, 3) == False\n'
375+
'(18 more lines below)\n'
376+
'-------------------------------------------------\n'
377+
'\n'
378+
'[This is the original code before your edit]\n'
379+
'-------------------------------------------------\n'
380+
'(5 more lines above)\n'
381+
'6| assert any_int(1.5, 2, 3) == False\n'
382+
'7| assert any_int(1, 2.5, 3) == False\n'
383+
'8| assert any_int(1, 2, 3.5) == False\n'
384+
'9| assert any_int(1.0, 2, 3) == False\n'
385+
'10| assert any_int(1, 2.0, 3) == False\n'
386+
'11| assert any_int(1, 2, 3.0) == False\n'
387+
'12| assert any_int(0, 0, 0) == True\n'
388+
'13| assert any_int(-1, -2, -3) == True\n'
389+
'14| assert any_int(1, -2, 3) == True\n'
390+
'15| assert any_int(1.5, -2, 3) == False\n'
391+
'(18 more lines below)\n'
392+
'-------------------------------------------------\n'
393+
'Your changes have NOT been applied. Please fix your edit command and try again.\n'
394+
'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
395+
'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
396+
)
397+
assert result == expected
398+
399+
277400
def test_edit_file(tmp_path):
278401
temp_file_path = tmp_path / 'a.txt'
279402
content = 'Line 1\nLine 2\nLine 3\nLine 4\nLine 5'
@@ -289,8 +412,7 @@ def test_edit_file(tmp_path):
289412
f'[File: {temp_file_path} (3 lines total after edit)]\n'
290413
'1|REPLACE TEXT\n'
291414
'2|Line 4\n'
292-
'3|Line 5\n'
293-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
415+
'3|Line 5\n' + MSG_FILE_UPDATED + '\n'
294416
)
295417
assert result.split('\n') == expected.split('\n')
296418

@@ -313,8 +435,7 @@ def test_edit_file_from_scratch(tmp_path):
313435
result = buf.getvalue()
314436
expected = (
315437
f'[File: {temp_file_path} (1 lines total after edit)]\n'
316-
'1|REPLACE TEXT\n'
317-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
438+
'1|REPLACE TEXT\n' + MSG_FILE_UPDATED + '\n'
318439
)
319440
assert result.split('\n') == expected.split('\n')
320441

@@ -324,6 +445,65 @@ def test_edit_file_from_scratch(tmp_path):
324445
assert lines[0].rstrip() == 'REPLACE TEXT'
325446

326447

448+
def test_edit_file_from_scratch_multiline_with_backticks_and_second_edit(tmp_path):
449+
temp_file_path = tmp_path / 'a.txt'
450+
create_file(str(temp_file_path))
451+
open_file(str(temp_file_path))
452+
453+
with io.StringIO() as buf:
454+
with contextlib.redirect_stdout(buf):
455+
edit_file(
456+
1,
457+
1,
458+
'`REPLACE TEXT1`\n`REPLACE TEXT2`\n`REPLACE TEXT3`',
459+
)
460+
result = buf.getvalue()
461+
expected = (
462+
f'[File: {temp_file_path} (3 lines total after edit)]\n'
463+
'1|`REPLACE TEXT1`\n'
464+
'2|`REPLACE TEXT2`\n'
465+
'3|`REPLACE TEXT3`\n' + MSG_FILE_UPDATED + '\n'
466+
)
467+
assert result.split('\n') == expected.split('\n')
468+
469+
with open(temp_file_path, 'r') as file:
470+
lines = file.readlines()
471+
assert len(lines) == 3
472+
assert lines[0].rstrip() == '`REPLACE TEXT1`'
473+
assert lines[1].rstrip() == '`REPLACE TEXT2`'
474+
assert lines[2].rstrip() == '`REPLACE TEXT3`'
475+
476+
# Check that no backticks are escaped in the edit_file call
477+
assert '\\`' not in result
478+
479+
# Perform a second edit
480+
with io.StringIO() as buf:
481+
with contextlib.redirect_stdout(buf):
482+
edit_file(
483+
1,
484+
3,
485+
'`REPLACED TEXT1`\n`REPLACED TEXT2`\n`REPLACED TEXT3`',
486+
)
487+
second_result = buf.getvalue()
488+
second_expected = (
489+
f'[File: {temp_file_path} (3 lines total after edit)]\n'
490+
'1|`REPLACED TEXT1`\n'
491+
'2|`REPLACED TEXT2`\n'
492+
'3|`REPLACED TEXT3`\n' + MSG_FILE_UPDATED + '\n'
493+
)
494+
assert second_result.split('\n') == second_expected.split('\n')
495+
496+
with open(temp_file_path, 'r') as file:
497+
lines = file.readlines()
498+
assert len(lines) == 3
499+
assert lines[0].rstrip() == '`REPLACED TEXT1`'
500+
assert lines[1].rstrip() == '`REPLACED TEXT2`'
501+
assert lines[2].rstrip() == '`REPLACED TEXT3`'
502+
503+
# Check that no backticks are escaped in the second edit_file call
504+
assert '\\`' not in second_result
505+
506+
327507
def test_edit_file_from_scratch_multiline(tmp_path):
328508
temp_file_path = tmp_path / 'a.txt'
329509
create_file(str(temp_file_path))
@@ -341,8 +521,7 @@ def test_edit_file_from_scratch_multiline(tmp_path):
341521
f'[File: {temp_file_path} (3 lines total after edit)]\n'
342522
'1|REPLACE TEXT1\n'
343523
'2|REPLACE TEXT2\n'
344-
'3|REPLACE TEXT3\n'
345-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
524+
'3|REPLACE TEXT3\n' + MSG_FILE_UPDATED + '\n'
346525
)
347526
assert result.split('\n') == expected.split('\n')
348527

@@ -550,8 +729,7 @@ def test_edit_lint_file_pass(tmp_path, monkeypatch):
550729
'1|\n'
551730
f'[File: {file_path} (2 lines total after edit)]\n'
552731
"1|print('hello')\n"
553-
'2|\n'
554-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
732+
'2|\n' + MSG_FILE_UPDATED + '\n'
555733
)
556734
assert result.split('\n') == expected.split('\n')
557735

@@ -663,7 +841,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):
663841
file_path = tmp_path / 'test_file.py'
664842
file_path.write_text('\n')
665843

666-
# Set environment variable to enable linting
844+
# Set environment variable to disable linting
667845
monkeypatch.setattr(
668846
'opendevin.runtime.plugins.agent_skills.agentskills.ENABLE_AUTO_LINT', False
669847
)
@@ -678,8 +856,7 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys):
678856
'1|\n'
679857
f'[File: {file_path} (2 lines total after edit)]\n'
680858
'1|undefined_name()\n'
681-
'2|\n'
682-
'[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n'
859+
'2|\n' + MSG_FILE_UPDATED + '\n'
683860
)
684861
assert result.split('\n') == expected.split('\n')
685862

0 commit comments

Comments
 (0)