Skip to content

Commit ce8a11a

Browse files
xingyaowwtobitege
andauthored
[Arch] Shrink runtime image size (OpenHands#3051)
* test_runtime_client.py to test _execute_bash() * runtime_build and runtime tweaks * fix in docker script * revert bash changes * use sandbox_config.update_source_code to control source code update * add od_version to the sandbox tag * add doc instruction for update source code * do not remove whole poetry folder; add mamba clean * add missing newlines --------- Co-authored-by: tobitege <[email protected]>
1 parent f3c23e8 commit ce8a11a

6 files changed

Lines changed: 129 additions & 38 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,13 @@ cache
210210

211211
# configuration
212212
config.toml
213+
config.toml_
213214
config.toml.bak
214215

215216
containers/agnostic_sandbox
216217

217218
# swe-bench-eval
218219
image_build_logs
219220
run_instance_logs
221+
222+
od_runtime_*.tar

opendevin/core/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ class SandboxConfig(metaclass=Singleton):
142142
enable_auto_lint: Whether to enable auto-lint.
143143
use_host_network: Whether to use the host network.
144144
initialize_plugins: Whether to initialize plugins.
145+
update_source_code: Whether to update the source code in the EventStreamRuntime.
146+
Used for development of EventStreamRuntime.
145147
"""
146148

147149
box_type: str = 'ssh'
@@ -157,6 +159,7 @@ class SandboxConfig(metaclass=Singleton):
157159
)
158160
use_host_network: bool = False
159161
initialize_plugins: bool = True
162+
update_source_code: bool = False
160163

161164
def defaults_to_dict(self) -> dict:
162165
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

opendevin/runtime/client/client.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
"""
2+
This is the main file for the runtime client.
3+
It is responsible for executing actions received from OpenDevin backend and producing observations.
4+
5+
NOTE: this will be executed inside the docker sandbox.
6+
7+
If you already have pre-build docker image yet you changed the code in this file OR dependencies, you need to rebuild the docker image to update the source code.
8+
9+
You should add SANDBOX_UPDATE_SOURCE_CODE=True to any `python XXX.py` command you run to update the source code.
10+
"""
11+
112
import argparse
213
import asyncio
314
import os

opendevin/runtime/client/runtime.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,15 @@ async def ainit(self, env_vars: dict[str, str] | None = None):
8181
# NOTE: You can need set DEBUG=true to update the source code
8282
# inside the container. This is useful when you want to test/debug the
8383
# latest code in the runtime docker container.
84-
update_source_code=config.debug,
84+
update_source_code=self.sandbox_config.update_source_code,
8585
)
8686
self.container = await self._init_container(
8787
self.sandbox_workspace_dir,
8888
mount_dir=config.workspace_mount_path,
8989
plugins=self.plugins,
9090
)
91-
# Initialize the env vars
91+
# MUST call super().ainit() to initialize both default env vars
92+
# AND the ones in env vars!
9293
await super().ainit(env_vars)
9394

9495
@staticmethod

opendevin/runtime/runtime.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ async def ainit(self, env_vars: dict[str, str] | None = None) -> None:
7474
7575
This method should be called after the runtime's constructor.
7676
"""
77-
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
78-
await self.add_env_vars(self.DEFAULT_ENV_VARS)
77+
if self.DEFAULT_ENV_VARS:
78+
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
79+
await self.add_env_vars(self.DEFAULT_ENV_VARS)
7980
if env_vars is not None:
8081
logger.debug(f'Adding provided env vars: {env_vars}')
8182
await self.add_env_vars(env_vars)

opendevin/runtime/utils/runtime_build.py

Lines changed: 106 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,23 @@
33
import shutil
44
import subprocess
55
import tempfile
6-
from importlib.metadata import version
76

87
import docker
8+
import toml
99

1010
import opendevin
1111
from opendevin.core.logger import opendevin_logger as logger
1212

1313

14+
def _get_package_version():
15+
"""Read the version from pyproject.toml as the other one may be outdated."""
16+
project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__)))
17+
pyproject_path = os.path.join(project_root, 'pyproject.toml')
18+
with open(pyproject_path, 'r') as f:
19+
pyproject_data = toml.load(f)
20+
return pyproject_data['tool']['poetry']['version']
21+
22+
1423
def _create_project_source_dist():
1524
"""Create a source distribution of the project. Return the path to the tarball."""
1625
# Copy the project directory to the container
@@ -24,8 +33,10 @@ def _create_project_source_dist():
2433
logger.error(f'Build failed: {result}')
2534
raise Exception(f'Build failed: {result}')
2635

36+
# Fetch the correct version from pyproject.toml
37+
package_version = _get_package_version()
2738
tarball_path = os.path.join(
28-
project_root, 'dist', f'opendevin-{version("opendevin")}.tar.gz'
39+
project_root, 'dist', f'opendevin-{package_version}.tar.gz'
2940
)
3041
if not os.path.exists(tarball_path):
3142
logger.error(f'Source distribution not found at {tarball_path}')
@@ -60,44 +71,64 @@ def _generate_dockerfile(
6071
if skip_init:
6172
dockerfile_content = f'FROM {base_image}\n'
6273
else:
74+
# Ubuntu 22.x has libgl1-mesa-glx, but 24.x and above have libgl1!
75+
if 'ubuntu' in base_image and (
76+
base_image.endswith(':latest') or base_image.endswith(':24.04')
77+
):
78+
LIBGL_MESA = 'libgl1'
79+
else:
80+
LIBGL_MESA = 'libgl1-mesa-glx'
81+
6382
dockerfile_content = (
6483
f'FROM {base_image}\n'
65-
# FIXME: make this more generic / cross-platform
66-
# Install necessary packages
67-
# libgl1-mesa-glx is extra dependency for OpenCV
68-
'RUN apt-get update && apt-get install -y wget sudo libgl1-mesa-glx\n'
69-
'RUN apt-get clean && rm -rf /var/lib/apt/lists/*\n' # Clean up the apt cache to reduce image size
70-
'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
71-
'RUN echo "" > /opendevin/bash.bashrc\n'
72-
'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
73-
' wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
74-
' bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
75-
' rm Miniforge3.sh && \\\n'
76-
' chmod -R g+w /opendevin/miniforge3 && \\\n'
77-
' bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
78-
' fi\n'
84+
# Install necessary packages and clean up in one layer
85+
f'RUN apt-get update && apt-get install -y wget sudo apt-utils {LIBGL_MESA} libasound2-plugins && \\\n'
86+
f' apt-get clean && rm -rf /var/lib/apt/lists/*\n'
87+
# Create necessary directories
88+
f'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs && \\\n'
89+
f' echo "" > /opendevin/bash.bashrc\n'
90+
# Install Miniforge3
91+
f'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
92+
f' wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
93+
f' bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
94+
f' rm Miniforge3.sh && \\\n'
95+
f' chmod -R g+w /opendevin/miniforge3 && \\\n'
96+
f' bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
97+
f' fi\n'
7998
'RUN /opendevin/miniforge3/bin/mamba install python=3.11 -y\n'
8099
'RUN /opendevin/miniforge3/bin/mamba install conda-forge::poetry -y\n'
81100
)
82101

83102
# Copy the project directory to the container
84103
dockerfile_content += 'COPY project.tar.gz /opendevin\n'
85-
# remove /opendevin/code if it exists
104+
# Remove /opendevin/code if it exists
86105
dockerfile_content += (
87106
'RUN if [ -d /opendevin/code ]; then rm -rf /opendevin/code; fi\n'
88107
)
89-
# unzip the tarball to /opendevin/code
108+
# Unzip the tarball to /opendevin/code
90109
dockerfile_content += (
91110
'RUN cd /opendevin && tar -xzvf project.tar.gz && rm project.tar.gz\n'
92111
)
93112
dockerfile_content += f'RUN mv /opendevin/{source_code_dirname} /opendevin/code\n'
94-
# install (or update) the dependencies
113+
114+
# ALTERNATIVE, but maybe not complete? (toml error!)
95115
dockerfile_content += (
96116
'RUN cd /opendevin/code && '
97117
'/opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && '
98-
'/opendevin/miniforge3/bin/mamba run -n base poetry install\n'
99-
# for browser (update if needed)
100-
'RUN apt-get update && cd /opendevin/code && /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium\n'
118+
'/opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root\n'
119+
'RUN /opendevin/miniforge3/bin/mamba run -n base poetry cache clear --all . && \\\n'
120+
'apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* &&\\\n'
121+
'/opendevin/miniforge3/bin/mamba clean --all\n'
122+
)
123+
124+
# For browser (update if needed)
125+
dockerfile_content += (
126+
'RUN apt-get update && \\\n'
127+
' cd /opendevin/code && \\\n'
128+
' /opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \\\n'
129+
' /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \\\n'
130+
' apt-get clean && \\\n'
131+
' rm -rf /var/lib/apt/lists/*\n'
101132
)
102133
return dockerfile_content
103134

@@ -176,69 +207,110 @@ def _get_new_image_name(base_image: str, dev_mode: bool = False) -> str:
176207
base_image = base_image + ':latest'
177208
[repo, tag] = base_image.split(':')
178209
repo = repo.replace('/', '___')
179-
return f'{prefix}:{repo}_tag_{tag}'
210+
211+
od_version = _get_package_version()
212+
return f'{prefix}:od_v{od_version}_image_{repo}_tag_{tag}'
180213

181214

182215
def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool:
183216
images = docker_client.images.list()
184-
for image in images:
185-
if image_name in image.tags:
186-
return True
217+
if images:
218+
for image in images:
219+
if image_name in image.tags:
220+
return True
187221
return False
188222

189223

190224
def build_runtime_image(
191225
base_image: str,
192226
docker_client: docker.DockerClient,
193227
update_source_code: bool = False,
228+
save_to_local_store: bool = False, # New parameter to control saving to local store
194229
) -> str:
195230
"""Build the runtime image for the OpenDevin runtime.
196231
197232
This is only used for **eventstream runtime**.
198233
"""
199234
new_image_name = _get_new_image_name(base_image)
235+
logger.info(f'New image name: {new_image_name}')
236+
237+
# Ensure new_image_name contains a colon
238+
if ':' not in new_image_name:
239+
raise ValueError(
240+
f'Invalid image name: {new_image_name}. Expected format "repository:tag".'
241+
)
200242

201243
# Try to pull the new image from the registry
202244
try:
203245
docker_client.images.pull(new_image_name)
204-
except Exception as e:
205-
logger.info(f'Error pulling image {new_image_name}, building it from scratch')
206-
logger.info(f'Non-fatal error: {e}')
246+
except Exception:
247+
logger.info(f'Cannot pull image {new_image_name} directly')
207248

208249
# Detect if the sandbox image is built
209250
image_exists = _check_image_exists(new_image_name, docker_client)
251+
if image_exists:
252+
logger.info(f'Image {new_image_name} exists')
253+
else:
254+
logger.info(f'Image {new_image_name} does not exist')
210255

211256
skip_init = False
212257
if image_exists and not update_source_code:
213258
# If (1) Image exists & we are not updating the source code, we can reuse the existing production image
259+
logger.info('No image build done (not updating source code)')
214260
return new_image_name
215261
elif image_exists and update_source_code:
216262
# If (2) Image exists & we plan to update the source code (in dev mode), we need to rebuild the image
217263
# and give it a special name
218264
# e.g., od_runtime:ubuntu_tag_latest -> od_runtime_dev:ubuntu_tag_latest
265+
logger.info('Image exists, but updating source code requested')
219266
base_image = new_image_name
220267
new_image_name = _get_new_image_name(base_image, dev_mode=True)
221268

222269
skip_init = True # since we only need to update the source code
223270
else:
224271
# If (3) Image does not exist, we need to build it from scratch
225272
# e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest
226-
skip_init = False # since we need to build the image from scratch
227-
228-
logger.info(f'Building image [{new_image_name}] from scratch')
273+
# This snippet would allow to load from archive:
274+
# tar_path = f'{new_image_name.replace(":", "_")}.tar'
275+
# if os.path.exists(tar_path):
276+
# logger.info(f'Loading image from {tar_path}')
277+
# load_command = ['docker', 'load', '-i', tar_path]
278+
# subprocess.run(load_command, check=True)
279+
# logger.info(f'Image {new_image_name} loaded from {tar_path}')
280+
# return new_image_name
281+
skip_init = False
282+
283+
if not skip_init:
284+
logger.info(f'Building image [{new_image_name}] from scratch')
229285

230286
_build_sandbox_image(base_image, new_image_name, docker_client, skip_init=skip_init)
287+
288+
# Only for development: allow to save image as archive:
289+
if not image_exists and save_to_local_store:
290+
tar_path = f'{new_image_name.replace(":", "_")}.tar'
291+
save_command = ['docker', 'save', '-o', tar_path, new_image_name]
292+
subprocess.run(save_command, check=True)
293+
logger.info(f'Image saved to {tar_path}')
294+
295+
load_command = ['docker', 'load', '-i', tar_path]
296+
subprocess.run(load_command, check=True)
297+
logger.info(f'Image {new_image_name} loaded back into Docker from {tar_path}')
298+
231299
return new_image_name
232300

233301

234302
if __name__ == '__main__':
235303
parser = argparse.ArgumentParser()
236-
parser.add_argument('--base_image', type=str, default='ubuntu:latest')
304+
parser.add_argument('--base_image', type=str, default='ubuntu:22.04')
237305
parser.add_argument('--update_source_code', type=bool, default=False)
306+
parser.add_argument('--save_to_local_store', type=bool, default=False)
238307
args = parser.parse_args()
239308

240309
client = docker.from_env()
241310
image_name = build_runtime_image(
242-
args.base_image, client, update_source_code=args.update_source_code
311+
args.base_image,
312+
client,
313+
update_source_code=args.update_source_code,
314+
save_to_local_store=args.save_to_local_store,
243315
)
244316
print(f'\nBUILT Image: {image_name}\n')

0 commit comments

Comments
 (0)