Add integration test with dummy agent (OpenHands#1316)

rbren · li-boxuan · web-flow · commit 0cda5f64af65 · 2024-04-30T16:52:00.000Z
* first pass at dummy * add assertion to dummy * add dummy workflow * beef up tests * try and fix huggingface issue * remove newlines * rename test * move to pytest * Revert " move to pytest" This reverts commit de8121c. * fix lint * delint * Update .github/workflows/dummy-agent-test.yml Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> --------- Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
diff --git a/.github/workflows/dummy-agent-test.yml b/.github/workflows/dummy-agent-test.yml
@@ -0,0 +1,21 @@
+name: Run e2e test with dummy agent
+
+on: [push]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Set up environment
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          poetry install --without evaluation
+          wget https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json -P /tmp/llama_index/models--BAAI--bge-small-en-v1.5/snapshots/5c38ec7c405ec4b44b94cc5a9bb96e735b38267a/1_Pooling/
+      - name: Run tests
+        run: |
+          poetry run python opendevin/main.py -t "do a flip" -m ollama/not-a-model -d ./workspace/ -c DummyAgent
diff --git a/agenthub/__init__.py b/agenthub/__init__.py
@@ -8,17 +8,20 @@
 load_dotenv()
 
 
-# Import agents after environment variables are loaded
+
 from . import (  # noqa: E402
     SWE_agent,
     codeact_agent,
     delegator_agent,
+    dummy_agent,
     monologue_agent,
     planner_agent,
 )
 
 __all__ = ['monologue_agent', 'codeact_agent',
-           'planner_agent', 'SWE_agent', 'delegator_agent']
+           'planner_agent', 'SWE_agent',
+           'delegator_agent',
+           'dummy_agent']
 
 for agent in all_microagents.values():
     name = agent['name']
diff --git a/agenthub/dummy_agent/__init__.py b/agenthub/dummy_agent/__init__.py
@@ -0,0 +1,5 @@
+from opendevin.agent import Agent
+
+from .agent import DummyAgent
+
+Agent.register('DummyAgent', DummyAgent)
diff --git a/agenthub/dummy_agent/agent.py b/agenthub/dummy_agent/agent.py
@@ -1,23 +1,118 @@
-"""Module for a Dummy agent."""
+import time
+from typing import List, TypedDict
 
-from typing import List
-
-from opendevin.action import Action
-from opendevin.action.base import NullAction
+from opendevin.action import (
+    Action,
+    AddTaskAction,
+    AgentFinishAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    BrowseURLAction,
+    CmdRunAction,
+    FileReadAction,
+    FileWriteAction,
+    ModifyTaskAction,
+)
 from opendevin.agent import Agent
-from opendevin.controller.agent_controller import AgentController
-from opendevin.observation.base import NullObservation, Observation
+from opendevin.llm.llm import LLM
+from opendevin.observation import (
+    AgentRecallObservation,
+    CmdOutputObservation,
+    FileReadObservation,
+    FileWriteObservation,
+    NullObservation,
+    Observation,
+)
 from opendevin.state import State
 
+"""
+FIXME: There are a few problems this surfaced
+* FileWrites seem to add an unintended newline at the end of the file
+* command_id is sometimes a number, sometimes a string
+* Why isn't the output of the background command split between two steps?
+* Browser not working
+"""
+
+ActionObs = TypedDict('ActionObs', {'action': Action, 'observations': List[Observation]})
+
+BACKGROUND_CMD = 'echo "This is in the background" && sleep .1 && echo "This too"'
+
 
 class DummyAgent(Agent):
-    """A dummy agent that does nothing but can be used in testing."""
+    """
+    The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
+    without making any LLM calls.
+    """
 
-    async def run(self, controller: AgentController) -> Observation:
-        return NullObservation('')
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        self.steps: List[ActionObs] = [{
+            'action': AddTaskAction(parent='0', goal='check the current directory'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': AddTaskAction(parent='0.0', goal='run ls'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': ModifyTaskAction(id='0.0', state='in_progress'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': AgentThinkAction(thought='Time to get started!'),
+            'observations': [NullObservation('')],
+        }, {
+            'action': CmdRunAction(command='echo "foo"'),
+            'observations': [CmdOutputObservation('foo', command_id=-1, command='echo "foo"')],
+        }, {
+            'action': FileWriteAction(content='echo "Hello, World!"', path='hello.sh'),
+            'observations': [FileWriteObservation('', path='hello.sh')],
+        }, {
+            'action': FileReadAction(path='hello.sh'),
+            'observations': [FileReadObservation('echo "Hello, World!"\n', path='hello.sh')],
+        }, {
+            'action': CmdRunAction(command='bash hello.sh'),
+            'observations': [CmdOutputObservation('Hello, World!', command_id=-1, command='bash hello.sh')],
+        }, {
+            'action': CmdRunAction(command=BACKGROUND_CMD, background=True),
+            'observations': [
+                CmdOutputObservation('Background command started. To stop it, send a `kill` action with id 42', command_id='42', command=BACKGROUND_CMD),  # type: ignore[arg-type]
+                CmdOutputObservation('This is in the background\nThis too\n', command_id='42', command=BACKGROUND_CMD),  # type: ignore[arg-type]
+            ]
+        }, {
+            'action': AgentRecallAction(query='who am I?'),
+            'observations': [
+                AgentRecallObservation('', memories=['I am a computer.']),
+                # CmdOutputObservation('This too\n', command_id='42', command=BACKGROUND_CMD),
+            ],
+        }, {
+            'action': BrowseURLAction(url='https://google.com'),
+            'observations': [
+                # BrowserOutputObservation('<html></html>', url='https://google.com', screenshot=""),
+            ],
+        }, {
+            'action': AgentFinishAction(),
+            'observations': [],
+        }]
 
     def step(self, state: State) -> Action:
-        return NullAction('')
+        time.sleep(0.1)
+        if state.iteration > 0:
+            prev_step = self.steps[state.iteration - 1]
+            if 'observations' in prev_step:
+                expected_observations = prev_step['observations']
+                hist_start = len(state.history) - len(expected_observations)
+                for i in range(len(expected_observations)):
+                    hist_obs = state.history[hist_start + i][1].to_dict()
+                    expected_obs = expected_observations[i].to_dict()
+                    if 'command_id' in hist_obs['extras'] and hist_obs['extras']['command_id'] != -1:
+                        del hist_obs['extras']['command_id']
+                        hist_obs['content'] = ''
+                    if 'command_id' in expected_obs['extras'] and expected_obs['extras']['command_id'] != -1:
+                        del expected_obs['extras']['command_id']
+                        expected_obs['content'] = ''
+                    if hist_obs != expected_obs:
+                        print('\nactual', hist_obs)
+                        print('\nexpect', expected_obs)
+                    assert hist_obs == expected_obs, f'Expected observation {expected_obs}, got {hist_obs}'
+        return self.steps[state.iteration]['action']
 
     def search_memory(self, query: str) -> List[str]:
-        return []
+        return ['I am a computer.']
diff --git a/docs/modules/python/agenthub/dummy_agent/agent.md b/docs/modules/python/agenthub/dummy_agent/agent.md
@@ -3,13 +3,12 @@ sidebar_label: agent
 title: agenthub.dummy_agent.agent
 ---
 
-Module for a Dummy agent.
-
 ## DummyAgent Objects
 
 ```python
 class DummyAgent(Agent)
 ```
 
-A dummy agent that does nothing but can be used in testing.
+The DummyAgent is used for e2e testing. It just sends the same set of actions deterministically,
+without making any LLM calls.
 
diff --git a/opendevin/action/agent.py b/opendevin/action/agent.py
@@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):
 
     async def run(self, controller: 'AgentController') -> AgentRecallObservation:
         return AgentRecallObservation(
-            content='Recalling memories...',
+            content='',
             memories=controller.agent.search_memory(self.query),
         )
 
diff --git a/opendevin/sandbox/docker/exec_box.py b/opendevin/sandbox/docker/exec_box.py
@@ -122,7 +122,10 @@ def run_command(container, command):
                     self.container.exec_run(
                         f'kill -9 {pid}', workdir=SANDBOX_WORKSPACE_DIR)
                 return -1, f'Command: "{cmd}" timed out'
-        return exit_code, logs.decode('utf-8').strip()
+        logs_out = logs.decode('utf-8')
+        if logs_out.endswith('\n'):
+            logs_out = logs_out[:-1]
+        return exit_code, logs_out
 
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
         # mkdir -p sandbox_dest if it doesn't exist

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ class AgentRecallAction(ExecutableAction):`
`22`	`22`
`23`	`23`	`async def run(self, controller: 'AgentController') -> AgentRecallObservation:`
`24`	`24`	`return AgentRecallObservation(`
`25`		`- content='Recalling memories...',`
	`25`	`+ content='',`
`26`	`26`	`memories=controller.agent.search_memory(self.query),`
`27`	`27`	`)`
`28`	`28`