Skip to content

Commit aa48b04

Browse files
committed
fix(ci): restore simulator test stability
1 parent 1071fc8 commit aa48b04

13 files changed

Lines changed: 213 additions & 56 deletions

File tree

dstack-util/src/system_setup.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ use luks2::{
2727
LuksAf, LuksConfig, LuksDigest, LuksHeader, LuksJson, LuksKdf, LuksKeyslot, LuksSegment,
2828
LuksSegmentSize,
2929
};
30-
use ra_rpc::{client::{CertInfo, RaClient, RaClientConfig}, Attestation};
30+
use ra_rpc::{
31+
client::{CertInfo, RaClient, RaClientConfig},
32+
Attestation,
33+
};
3134
use ra_tls::{
3235
attestation::QuoteContentType,
3336
cert::{generate_ra_cert, CertConfigV2, CertSigningRequestV2, Csr},
@@ -56,7 +59,6 @@ use ra_tls::rcgen::{KeyPair, PKCS_ECDSA_P256_SHA256};
5659
use serde_human_bytes as hex_bytes;
5760
use serde_json::Value;
5861

59-
6062
async fn sign_cert_request(
6163
cert_client: &CertRequestClient,
6264
key: &KeyPair,

guest-agent-simulator/dstack.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ log_level = "debug"
1414
keys_file = "appkeys.json"
1515
compose_file = "app-compose.json"
1616
sys_config_file = "sys-config.json"
17+
data_disks = ["/"]
1718

1819
[default.core.simulator]
1920
attestation_file = "attestation.bin"

guest-agent-simulator/src/main.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use clap::Parser;
1111
use dstack_guest_agent::{
1212
backend::PlatformBackend,
1313
config::{self, Config},
14-
AppState, run_server,
14+
run_server, AppState,
1515
};
1616
use dstack_guest_agent_rpc::{AttestResponse, GetQuoteResponse};
1717
use ra_rpc::Attestation;
@@ -61,7 +61,10 @@ impl PlatformBackend for SimulatorPlatform {
6161
}
6262

6363
fn certificate_attestation(&self, pubkey: &[u8]) -> Result<VersionedAttestation> {
64-
Ok(simulator::simulated_certificate_attestation(&self.attestation, pubkey))
64+
Ok(simulator::simulated_certificate_attestation(
65+
&self.attestation,
66+
pubkey,
67+
))
6568
}
6669

6770
fn quote_response(&self, report_data: [u8; 64], vm_config: &str) -> Result<GetQuoteResponse> {
@@ -91,21 +94,25 @@ async fn main() -> Result<()> {
9194
.extract()
9295
.context("Failed to extract simulator core config")?;
9396
warn!(attestation_file = %sim_config.simulator.attestation_file, "starting dstack guest-agent simulator");
94-
let attestation = simulator::load_versioned_attestation(&sim_config.simulator.attestation_file)?;
95-
let state = AppState::new(sim_config.core, Arc::new(SimulatorPlatform::new(attestation)))
96-
.await
97-
.context("Failed to create simulator app state")?;
97+
let attestation =
98+
simulator::load_versioned_attestation(&sim_config.simulator.attestation_file)?;
99+
let state = AppState::new(
100+
sim_config.core,
101+
Arc::new(SimulatorPlatform::new(attestation)),
102+
)
103+
.await
104+
.context("Failed to create simulator app state")?;
98105
run_server(state, figment, args.watchdog).await
99106
}
100107

101-
102108
#[cfg(test)]
103109
mod tests {
104110
use super::*;
105111

106112
fn load_fixture_platform() -> SimulatorPlatform {
107113
let fixture = simulator::load_versioned_attestation(
108-
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../guest-agent/fixtures/attestation.bin"),
114+
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
115+
.join("../guest-agent/fixtures/attestation.bin"),
109116
)
110117
.expect("fixture attestation should load");
111118
SimulatorPlatform::new(fixture)
@@ -121,7 +128,9 @@ mod tests {
121128
#[test]
122129
fn simulator_provides_certificate_attestation() {
123130
let platform = load_fixture_platform();
124-
let cert_attestation = platform.certificate_attestation(b"test-public-key").unwrap();
131+
let cert_attestation = platform
132+
.certificate_attestation(b"test-public-key")
133+
.unwrap();
125134
assert!(cert_attestation.decode_app_info(false).is_ok());
126135
let _ = platform.attestation_for_info().unwrap();
127136
}

guest-agent-simulator/src/simulator.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@ use std::path::Path;
66

77
use anyhow::{Context, Result};
88
use dstack_guest_agent_rpc::{AttestResponse, GetQuoteResponse};
9-
use std::fs;
109
use ra_rpc::Attestation;
11-
use ra_tls::attestation::{
12-
QuoteContentType, VersionedAttestation, TDX_QUOTE_REPORT_DATA_RANGE,
13-
};
10+
use ra_tls::attestation::{QuoteContentType, VersionedAttestation, TDX_QUOTE_REPORT_DATA_RANGE};
11+
use std::fs;
1412

1513
pub fn load_versioned_attestation(path: impl AsRef<Path>) -> Result<VersionedAttestation> {
1614
let path = path.as_ref();

guest-agent/src/config.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ pub fn load_config_figment(config_file: Option<&str>) -> Figment {
1616
load_config_figment_with_default(DEFAULT_CONFIG, config_file)
1717
}
1818

19-
pub fn load_config_figment_with_default(default_config: &str, config_file: Option<&str>) -> Figment {
19+
pub fn load_config_figment_with_default(
20+
default_config: &str,
21+
config_file: Option<&str>,
22+
) -> Figment {
2023
load_config("dstack", default_config, config_file, true)
2124
}
2225

guest-agent/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use anyhow::{Context, Result};
66
use clap::Parser;
7-
use dstack_guest_agent::{config, AppState, run_server};
7+
use dstack_guest_agent::{config, run_server, AppState};
88

99
#[derive(Parser)]
1010
#[command(author, version, about, long_version = dstack_guest_agent::app_version())]

guest-agent/src/rpc_service.rs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,6 @@ fn pad64(data: &[u8]) -> Option<[u8; 64]> {
457457
Some(padded)
458458
}
459459

460-
461460
impl RpcCall<AppState> for InternalRpcHandler {
462461
type PrpcService = DstackGuestServer<Self>;
463462

@@ -657,15 +656,18 @@ impl RpcCall<AppState> for ExternalRpcHandler {
657656
#[cfg(test)]
658657
mod tests {
659658
use super::*;
660-
use crate::{backend::PlatformBackend, config::{AppComposeWrapper, Config}};
659+
use crate::{
660+
backend::PlatformBackend,
661+
config::{AppComposeWrapper, Config},
662+
};
661663
use dstack_guest_agent_rpc::{GetAttestationForAppKeyRequest, SignRequest};
662-
use ra_tls::attestation::VersionedAttestation;
663664
use dstack_types::{AppCompose, AppKeys, KeyProvider};
664665
use ed25519_dalek::ed25519::signature::hazmat::PrehashVerifier;
665666
use ed25519_dalek::{
666667
Signature as Ed25519Signature, Verifier, VerifyingKey as Ed25519VerifyingKey,
667668
};
668669
use k256::ecdsa::{Signature as K256Signature, VerifyingKey};
670+
use ra_tls::attestation::VersionedAttestation;
669671
use sha2::Sha256;
670672
use std::collections::HashSet;
671673
use std::convert::TryFrom;
@@ -812,7 +814,8 @@ pNs85uhOZE8z2jr8Pg==
812814

813815
fn certificate_attestation(&self, pubkey: &[u8]) -> Result<VersionedAttestation> {
814816
let mut attestation = self.attestation.clone();
815-
let report_data = ra_tls::attestation::QuoteContentType::RaTlsCert.to_report_data(pubkey);
817+
let report_data =
818+
ra_tls::attestation::QuoteContentType::RaTlsCert.to_report_data(pubkey);
816819
attestation.set_report_data(report_data);
817820
Ok(attestation)
818821
}
@@ -822,15 +825,18 @@ pNs85uhOZE8z2jr8Pg==
822825
report_data: [u8; 64],
823826
vm_config: &str,
824827
) -> Result<GetQuoteResponse> {
825-
let ra_tls::attestation::VersionedAttestation::V0 { attestation } = self.attestation.clone();
828+
let ra_tls::attestation::VersionedAttestation::V0 { attestation } =
829+
self.attestation.clone();
826830
let mut attestation = attestation;
827831
let Some(quote) = attestation.tdx_quote_mut() else {
828832
return Err(anyhow::anyhow!("Quote not found"));
829833
};
830-
quote.quote[ra_tls::attestation::TDX_QUOTE_REPORT_DATA_RANGE].copy_from_slice(&report_data);
834+
quote.quote[ra_tls::attestation::TDX_QUOTE_REPORT_DATA_RANGE]
835+
.copy_from_slice(&report_data);
831836
Ok(GetQuoteResponse {
832837
quote: quote.quote.to_vec(),
833-
event_log: serde_json::to_string(&quote.event_log).context("Failed to serialize event log")?,
838+
event_log: serde_json::to_string(&quote.event_log)
839+
.context("Failed to serialize event log")?,
834840
report_data: report_data.to_vec(),
835841
vm_config: vm_config.to_string(),
836842
})
@@ -854,7 +860,10 @@ pNs85uhOZE8z2jr8Pg==
854860
cert_client: dummy_cert_client,
855861
demo_cert: RwLock::new(String::new()),
856862
platform: Arc::new(TestSimulatorPlatform {
857-
attestation: VersionedAttestation::from_scale(&std::fs::read(temp_attestation_file.path()).unwrap()).unwrap(),
863+
attestation: VersionedAttestation::from_scale(
864+
&std::fs::read(temp_attestation_file.path()).unwrap(),
865+
)
866+
.unwrap(),
858867
}),
859868
};
860869

guest-agent/src/server.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
use std::{future::pending, os::unix::net::UnixListener as StdUnixListener, time::Duration};
66

7-
use anyhow::{anyhow, Context, Result};
87
use crate::config::BindAddr;
98
use crate::guest_api_service::GuestApiHandler;
109
use crate::http_routes;
1110
use crate::rpc_service::{AppState, ExternalRpcHandler, InternalRpcHandler, InternalRpcHandlerV0};
1211
use crate::socket_activation::{ActivatedSockets, ActivatedUnixListener};
12+
use anyhow::{anyhow, Context, Result};
1313
use rocket::{
1414
fairing::AdHoc,
1515
figment::Figment,
@@ -191,9 +191,15 @@ pub async fn run(state: AppState, figment: Figment, watchdog: bool) -> Result<()
191191
let internal_v0_figment = figment.clone().select("internal-v0");
192192
let internal_figment = figment.clone().select("internal");
193193
let external_figment = figment.clone().select("external");
194-
let bind_addr: BindAddr = external_figment
195-
.extract()
196-
.context("Failed to extract bind address")?;
194+
let bind_addr = if watchdog {
195+
Some(
196+
external_figment
197+
.extract::<BindAddr>()
198+
.context("Failed to extract bind address")?,
199+
)
200+
} else {
201+
None
202+
};
197203
let guest_api_figment = figment.select("guest-api");
198204

199205
let activated = ActivatedSockets::from_env();
@@ -211,7 +217,7 @@ pub async fn run(state: AppState, figment: Figment, watchdog: bool) -> Result<()
211217
_ = async {
212218
let _ = tappd_ready_rx.await;
213219
let _ = sock_ready_rx.await;
214-
if watchdog {
220+
if let Some(bind_addr) = bind_addr {
215221
run_watchdog(bind_addr.port).await;
216222
} else {
217223
pending::<()>().await;

run-tests.sh

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,73 @@
44
#
55
# SPDX-License-Identifier: Apache-2.0
66

7-
set -e
7+
set -Eeuo pipefail
88

9-
(cd sdk/simulator && ./build.sh)
9+
ROOT_DIR="$(pwd -P)"
10+
SIMULATOR_DIR="$ROOT_DIR/sdk/simulator"
11+
SIMULATOR_LOG="$SIMULATOR_DIR/dstack-simulator.log"
12+
DSTACK_SOCKET="$SIMULATOR_DIR/dstack.sock"
13+
TAPPD_SOCKET="$SIMULATOR_DIR/tappd.sock"
14+
SIMULATOR_PID=""
1015

11-
pushd sdk/simulator
12-
./dstack-simulator &
16+
cleanup() {
17+
if [[ -n "${SIMULATOR_PID:-}" ]]; then
18+
kill "$SIMULATOR_PID" 2>/dev/null || true
19+
wait "$SIMULATOR_PID" 2>/dev/null || true
20+
fi
21+
}
22+
23+
print_simulator_logs() {
24+
if [[ -f "$SIMULATOR_LOG" ]]; then
25+
echo "Last simulator logs:"
26+
tail -100 "$SIMULATOR_LOG" || true
27+
fi
28+
}
29+
30+
wait_for_socket() {
31+
local socket_path="$1"
32+
local name="$2"
33+
34+
for _ in {1..100}; do
35+
if [[ -S "$socket_path" ]]; then
36+
return 0
37+
fi
38+
if [[ -n "${SIMULATOR_PID:-}" ]] && ! kill -0 "$SIMULATOR_PID" 2>/dev/null; then
39+
echo "Simulator exited before $name socket became ready."
40+
print_simulator_logs
41+
return 1
42+
fi
43+
sleep 0.2
44+
done
45+
46+
echo "Timed out waiting for $name socket at $socket_path"
47+
print_simulator_logs
48+
return 1
49+
}
50+
51+
trap 'print_simulator_logs' ERR
52+
trap cleanup EXIT INT TERM
53+
54+
rm -f "$DSTACK_SOCKET" "$TAPPD_SOCKET" "$SIMULATOR_LOG"
55+
(
56+
cd "$SIMULATOR_DIR"
57+
./build.sh
58+
)
59+
60+
(
61+
cd "$SIMULATOR_DIR"
62+
./dstack-simulator >"$SIMULATOR_LOG" 2>&1
63+
) &
1364
SIMULATOR_PID=$!
14-
trap "kill $SIMULATOR_PID 2>/dev/null || true" EXIT
1565
echo "Simulator process (PID: $SIMULATOR_PID) started."
16-
popd
1766

18-
export DSTACK_SIMULATOR_ENDPOINT=$(realpath sdk/simulator/dstack.sock)
19-
export TAPPD_SIMULATOR_ENDPOINT=$(realpath sdk/simulator/tappd.sock)
67+
wait_for_socket "$DSTACK_SOCKET" "dstack"
68+
wait_for_socket "$TAPPD_SOCKET" "tappd"
69+
70+
export DSTACK_SIMULATOR_ENDPOINT="$DSTACK_SOCKET"
71+
export TAPPD_SIMULATOR_ENDPOINT="$TAPPD_SOCKET"
2072

2173
echo "DSTACK_SIMULATOR_ENDPOINT: $DSTACK_SIMULATOR_ENDPOINT"
2274
echo "TAPPD_SIMULATOR_ENDPOINT: $TAPPD_SIMULATOR_ENDPOINT"
2375

24-
# Run the tests
2576
cargo test --all-features -- --show-output

sdk/python/tests/test_client.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
import hashlib
6+
import os
67
import warnings
78

89
from evidence_api.tdx.quote import TdxQuote
10+
import httpx
911
import pytest
1012

1113
from dstack_sdk import AsyncDstackClient
@@ -248,6 +250,14 @@ def test_unix_socket_file_not_exist():
248250
os.environ["DSTACK_SIMULATOR_ENDPOINT"] = saved_env
249251

250252

253+
def assert_emit_event_behavior(error: Exception | None) -> None:
254+
if "DSTACK_SIMULATOR_ENDPOINT" in os.environ:
255+
assert isinstance(error, httpx.HTTPStatusError)
256+
assert error.response.status_code == 400
257+
else:
258+
assert error is None, f"emit_event unexpectedly failed: {error}"
259+
260+
251261
def test_non_unix_socket_endpoints():
252262
"""Test that client doesn't throw error for non-unix socket paths."""
253263
import os
@@ -272,17 +282,25 @@ def test_non_unix_socket_endpoints():
272282
async def test_emit_event():
273283
"""Test emit event functionality."""
274284
client = AsyncDstackClient()
275-
# This should not raise an error
276-
await client.emit_event("test-event", "test payload")
277-
await client.emit_event("test-event-bytes", b"test payload bytes")
285+
error = None
286+
try:
287+
await client.emit_event("test-event", "test payload")
288+
await client.emit_event("test-event-bytes", b"test payload bytes")
289+
except Exception as exc: # pragma: no cover - behavior depends on runtime mode
290+
error = exc
291+
assert_emit_event_behavior(error)
278292

279293

280294
def test_sync_emit_event():
281295
"""Test sync emit event functionality."""
282296
client = DstackClient()
283-
# This should not raise an error
284-
client.emit_event("test-event", "test payload")
285-
client.emit_event("test-event-bytes", b"test payload bytes")
297+
error = None
298+
try:
299+
client.emit_event("test-event", "test payload")
300+
client.emit_event("test-event-bytes", b"test payload bytes")
301+
except Exception as exc: # pragma: no cover - behavior depends on runtime mode
302+
error = exc
303+
assert_emit_event_behavior(error)
286304

287305

288306
def test_emit_event_validation():

0 commit comments

Comments
 (0)