Skip to content

Commit 0cda5f6

Browse files
rbrenli-boxuan
andauthored
Add integration test with dummy agent (OpenHands#1316)
* first pass at dummy * add assertion to dummy * add dummy workflow * beef up tests * try and fix huggingface issue * remove newlines * rename test * move to pytest * Revert " move to pytest" This reverts commit de8121c. * fix lint * delint * Update .github/workflows/dummy-agent-test.yml Co-authored-by: Boxuan Li <[email protected]> --------- Co-authored-by: Boxuan Li <[email protected]>
1 parent eb1c3d8 commit 0cda5f6

7 files changed

Lines changed: 145 additions & 19 deletions

File tree

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: Run e2e test with dummy agent
2+
3+
on: [push]
4+
5+
jobs:
6+
test:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v4
10+
- name: Set up Python
11+
uses: actions/setup-python@v5
12+
with:
13+
python-version: '3.11'
14+
- name: Set up environment
15+
run: |
16+
curl -sSL https://install.python-poetry.org | python3 -
17+
poetry install --without evaluation
18+
wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
19+
- name: Run tests
20+
run: |
21+
poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent

agenthub/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,20 @@
88
load_dotenv()
99

1010

11-
# Import agents after environment variables are loaded
11+
1212
from . import ( # noqa: E402
1313
SWE_agent,
1414
codeact_agent,
1515
delegator_agent,
16+
dummy_agent,
1617
monologue_agent,
1718
planner_agent,
1819
)
1920

2021
__all__ = ['monologue_agent', 'codeact_agent',
21-
'planner_agent', 'SWE_agent', 'delegator_agent']
22+
'planner_agent', 'SWE_agent',
23+
'delegator_agent',
24+
'dummy_agent']
2225

2326
for agent in all_microagents.values():
2427
name = agent['name']

agenthub/dummy_agent/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from opendevin.agent import Agent
2+
3+
from .agent import DummyAgent
4+
5+
Agent.register('DummyAgent', DummyAgent)

agenthub/dummy_agent/agent.py

Lines changed: 107 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,118 @@
1-
"""Module for a Dummy agent."""
1+
import time
2+
from typing import List, TypedDict
23

3-
from typing import List
4-
5-
from opendevin.action import Action
6-
from opendevin.action.base import NullAction
4+
from opendevin.action import (
5+
Action,
6+
AddTaskAction,
7+
AgentFinishAction,
8+
AgentRecallAction,
9+
AgentThinkAction,
10+
BrowseURLAction,
11+
CmdRunAction,
12+
FileReadAction,
13+
FileWriteAction,
14+
ModifyTaskAction,
15+
)
716
from opendevin.agent import Agent
8-
from opendevin.controller.agent_controller import AgentController
9-
from opendevin.observation.base import NullObservation, Observation
17+
from opendevin.llm.llm import LLM
18+
from opendevin.observation import (
19+
AgentRecallObservation,
20+
CmdOutputObservation,
21+
FileReadObservation,
22+
FileWriteObservation,
23+
NullObservation,
24+
Observation,
25+
)
1026
from opendevin.state import State
1127

28+
"""
29+
FIXME: There are a few problems this surfaced
30+
* FileWrites seem to add an unintended newline at the end of the file
31+
* command_id is sometimes a number, sometimes a string
32+
* Why isn't the output of the background command split between two steps?
33+
* Browser not working
34+
"""
35+
36+
ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})
37+
38+
BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'
39+
1240

1341
class DummyAgent(Agent):
14-
"""A dummy agent that does nothing but can be used in testing."""
42+
"""
43+
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
44+
without making any LLM calls.
45+
"""
1546

16-
async def run(self, controller: AgentController) -> Observation:
17-
return NullObservation('')
47+
def __init__(self, llm: LLM):
48+
super().__init__(llm)
49+
self.steps: List[ActionObs] = [{
50+
'action': AddTaskAction(parent='0', goal='check the current directory'),
51+
'observations': [NullObservation('')],
52+
}, {
53+
'action': AddTaskAction(parent='0.0', goal='run ls'),
54+
'observations': [NullObservation('')],
55+
}, {
56+
'action': ModifyTaskAction(id='0.0', state='in_progress'),
57+
'observations': [NullObservation('')],
58+
}, {
59+
'action': AgentThinkAction(thought='Time to get started!'),
60+
'observations': [NullObservation('')],
61+
}, {
62+
'action': CmdRunAction(command='echo "foo"'),
63+
'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
64+
}, {
65+
'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
66+
'observations': [FileWriteObservation('', path='hello.sh')],
67+
}, {
68+
'action': FileReadAction(path='hello.sh'),
69+
'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
70+
}, {
71+
'action': CmdRunAction(command='bash hello.sh'),
72+
'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
73+
}, {
74+
'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
75+
'observations': [
76+
CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
77+
CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD), # type: ignore[arg-type]
78+
]
79+
}, {
80+
'action': AgentRecallAction(query='who am I?'),
81+
'observations': [
82+
AgentRecallObservation('', memories=['I am a computer.']),
83+
# CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
84+
],
85+
}, {
86+
'action': BrowseURLAction(url='https://google.com'),
87+
'observations': [
88+
# BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
89+
],
90+
}, {
91+
'action': AgentFinishAction(),
92+
'observations': [],
93+
}]
1894

1995
def step(self, state: State) -> Action:
20-
return NullAction('')
96+
time.sleep(0.1)
97+
if state.iteration > 0:
98+
prev_step = self.steps[state.iteration - 1]
99+
if 'observations' in prev_step:
100+
expected_observations = prev_step['observations']
101+
hist_start = len(state.history) - len(expected_observations)
102+
for i in range(len(expected_observations)):
103+
hist_obs = state.history[hist_start + i][1].to_dict()
104+
expected_obs = expected_observations[i].to_dict()
105+
if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
106+
del hist_obs['extras']['command_id']
107+
hist_obs['content'] = ''
108+
if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
109+
del expected_obs['extras']['command_id']
110+
expected_obs['content'] = ''
111+
if hist_obs != expected_obs:
112+
print('\nactual', hist_obs)
113+
print('\nexpect', expected_obs)
114+
assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
115+
return self.steps[state.iteration]['action']
21116

22117
def search_memory(self, query: str) -> List[str]:
23-
return []
118+
return ['I am a computer.']

docs/modules/python/agenthub/dummy_agent/agent.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@ sidebar_label: agent
33
title: agenthub.dummy_agent.agent
44
---
55

6-
Module for a Dummy agent.
7-
86
## DummyAgent Objects
97

108
```python
119
class DummyAgent(Agent)
1210
```
1311

14-
A dummy agent that does nothing but can be used in testing.
12+
The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
13+
without making any LLM calls.
1514

opendevin/action/agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):
2222

2323
async def run(self, controller: 'AgentController') -> AgentRecallObservation:
2424
return AgentRecallObservation(
25-
content='Recalling memories...',
25+
content='',
2626
memories=controller.agent.search_memory(self.query),
2727
)
2828

opendevin/sandbox/docker/exec_box.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,10 @@ def run_command(container, command):
122122
self.container.exec_run(
123123
f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
124124
return -1, f'Command: "{cmd}" timed out'
125-
return exit_code, logs.decode('utf-8').strip()
125+
logs_out = logs.decode('utf-8')
126+
if logs_out.endswith('\n'):
127+
logs_out = logs_out[:-1]
128+
return exit_code, logs_out
126129

127130
def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
128131
# mkdir -p sandbox_dest if it doesn't exist

0 commit comments

Comments
 (0)