Skip to content

Commit 0cb25a9

Browse files
committed
fix: workaround xonsh bug that if terminated a subprocess not in a separate shell any later subprocess fails silently
1 parent 0381a77 commit 0cb25a9

2 files changed

Lines changed: 20 additions & 17 deletions

File tree

.github/workflows/eval-overhead-e2e.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
benchmark:
2626
runs-on: self-hosted
2727

28-
name: Run pytest-benchmark benchmark example
28+
name: Run Overhead Benchmarks
2929
steps:
3030
- uses: actions/checkout@v4
3131
- uses: actions/setup-python@v5

eval_scripts/perf_benchmark/run_all.xsh

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1+
import argparse
12
import os
3+
import signal
24
import subprocess
35

4-
import argparse
6+
# configs
7+
$RAISE_SUBPROC_ERROR = True
8+
os.environ["PYTHONUNBUFFERED"] = "1"
59

610
parser = argparse.ArgumentParser()
711
parser.add_argument("--res_folder", type=str, required=False)
812
args = parser.parse_args()
913

10-
# configs
11-
$RAISE_SUBPROC_ERROR = True
12-
$XONSH_SHOW_TRACEBACK = True
13-
os.environ["PYTHONUNBUFFERED"] = "1"
14-
1514
SELC_INV_FILE = "sampled_100_invariants.json"
1615
COMMIT = $(git rev-parse --short HEAD).strip()
1716

@@ -35,16 +34,17 @@ mv @(MICRO_FOLDER)/wrapper_overhead_micro.csv @(RES_FOLDER)/
3534

3635
def run_cmd(cmd: str, kill_sec: int):
3736
with open("cmd_output.log", "w") as f:
38-
p = subprocess.Popen(cmd.split(), stdout=f, stderr=f)
37+
p = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
3938
try:
4039
output, _ = p.communicate(timeout=kill_sec)
4140
except subprocess.TimeoutExpired:
4241
print(f"Timeout: {kill_sec} seconds, killing the process")
43-
p.terminate()
44-
try:
45-
p.wait(timeout=5)
46-
except subprocess.TimeoutExpired:
47-
p.kill()
42+
# os.kill(
43+
# p.pid, signal.SIGTERM
44+
# ) # send SIGTERM to the process group NOTE: the signal will be delivered here again
45+
# p.kill()
46+
p.terminate() # sends SIGTERM
47+
print("Killed the running process...")
4848

4949
# run e2e benchmark
5050
def run_exp(kill_sec: int = 100, workload: str = "mnist"):
@@ -63,7 +63,7 @@ def run_exp(kill_sec: int = 100, workload: str = "mnist"):
6363

6464
cd f"{E2E_FOLDER}/{workload}"
6565

66-
# run five setups
66+
# run four setups
6767

6868
# 1. naive running
6969
print("Running naive setup")
@@ -79,11 +79,14 @@ def run_exp(kill_sec: int = 100, workload: str = "mnist"):
7979
rm iteration_times.txt
8080

8181
# 3. traincheck proxy instrumentation
82-
print("Running traincheck proxy instrumentation")
83-
run_cmd(CMD_TRAINCHECK, 30)
82+
print("Running traincheck instrumentation")
83+
run_cmd(CMD_TRAINCHECK, kill_sec)
84+
print("Trying to copy")
85+
print(os.listdir("traincheck"))
86+
# shutil.copy("traincheck/iteration_times.txt", f"../../{RES_FOLDER}/e2e_{workload}_monkey-patch.txt")
8487
cp traincheck/iteration_times.txt @(f"../../{RES_FOLDER}/e2e_{workload}_monkey-patch.txt")
88+
print("Copied")
8589
rm -rf traincheck
86-
# rm iteration_times.txt
8790

8891
# 4. traincheck selective instrumentation
8992
print("Running traincheck selective instrumentation")

0 commit comments

Comments
 (0)