Skip to content

Commit 3bbd64f

Browse files
committed
Track agent instances via persistent UUID slots
Each agent process acquires a unique file-locked slot with a persistent UUID. The client sends X-Agent-UUID on all runner API calls so the server can distinguish multiple instances sharing the same token.
1 parent 18a90c9 commit 3bbd64f

4 files changed

Lines changed: 130 additions & 19 deletions

File tree

agent/client.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
type Client struct {
1313
serverURL string
1414
token string
15+
uuid string
1516
httpClient *http.Client
1617
}
1718

@@ -25,6 +26,10 @@ func NewClient(serverURL, token string) *Client {
2526
}
2627
}
2728

29+
func (c *Client) SetUUID(uuid string) {
30+
c.uuid = uuid
31+
}
32+
2833
func (c *Client) doRequest(method, path string, body interface{}) (*http.Response, error) {
2934
var bodyReader io.Reader
3035
if body != nil {
@@ -40,6 +45,9 @@ func (c *Client) doRequest(method, path string, body interface{}) (*http.Respons
4045
return nil, err
4146
}
4247
req.Header.Set("Authorization", "Bearer "+c.token)
48+
if c.uuid != "" {
49+
req.Header.Set("X-Agent-UUID", c.uuid)
50+
}
4351
if body != nil {
4452
req.Header.Set("Content-Type", "application/json")
4553
}

agent/flock_unix.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//go:build !windows
2+
3+
package agent
4+
5+
import (
6+
"os"
7+
"syscall"
8+
)
9+
10+
func lockFileExclusive(f *os.File) error {
11+
return syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
12+
}
13+
14+
func unlockFile(f *os.File) {
15+
_ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
16+
}

agent/flock_windows.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//go:build windows
2+
3+
package agent
4+
5+
import (
6+
"os"
7+
"syscall"
8+
"unsafe"
9+
)
10+
11+
var (
12+
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
13+
procLockFileEx = modkernel32.NewProc("LockFileEx")
14+
procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
15+
)
16+
17+
const (
18+
lockfileExclusiveLock = 0x00000002
19+
lockfileFailImmediately = 0x00000001
20+
)
21+
22+
func lockFileExclusive(f *os.File) error {
23+
var overlapped syscall.Overlapped
24+
r1, _, err := procLockFileEx.Call(
25+
uintptr(f.Fd()),
26+
uintptr(lockfileExclusiveLock|lockfileFailImmediately),
27+
0,
28+
1, 0,
29+
uintptr(unsafe.Pointer(&overlapped)),
30+
)
31+
if r1 == 0 {
32+
return err
33+
}
34+
return nil
35+
}
36+
37+
func unlockFile(f *os.File) {
38+
var overlapped syscall.Overlapped
39+
procUnlockFileEx.Call(
40+
uintptr(f.Fd()),
41+
0,
42+
1, 0,
43+
uintptr(unsafe.Pointer(&overlapped)),
44+
)
45+
}

agent/worker.go

Lines changed: 61 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,60 +27,102 @@ type Worker struct {
2727
labels string
2828
pollInterval time.Duration
2929
uuid string
30+
slotCleanup func()
3031
log *logrus.Entry
3132

3233
metricsMu sync.Mutex
3334
lastCounters *RawCounters
3435
}
3536

3637
func NewWorker(client *Client, docker DockerConfig, vcsOpts vcs.Options, labels string) *Worker {
38+
uuid, cleanup := acquireAgentSlot()
39+
client.SetUUID(uuid)
3740
return &Worker{
3841
client: client,
3942
docker: docker,
4043
vcsOpts: vcsOpts,
4144
labels: labels,
4245
pollInterval: 1 * time.Second,
43-
uuid: loadOrGenerateUUID(),
46+
uuid: uuid,
47+
slotCleanup: cleanup,
4448
log: logrus.WithField("component", "agent"),
4549
}
4650
}
4751

48-
// uuidFilePath returns the path to the persistent UUID file in the user's config directory.
49-
func uuidFilePath() string {
52+
// agentSlotDir returns the directory for agent UUID slot files.
53+
func agentSlotDir() string {
5054
dir, err := os.UserConfigDir()
5155
if err != nil {
5256
dir = os.TempDir()
5357
}
54-
return filepath.Join(dir, "actionforge", "agent-uuid")
58+
return filepath.Join(dir, "actionforge")
5559
}
5660

57-
// loadOrGenerateUUID loads a persistent UUID from disk, or generates and saves a new one.
58-
func loadOrGenerateUUID() string {
59-
path := uuidFilePath()
60-
if data, err := os.ReadFile(path); err == nil {
61-
if id := strings.TrimSpace(string(data)); len(id) == 36 {
62-
return id
61+
// acquireAgentSlot finds and locks the lowest available agent slot.
62+
// Each slot has a persistent UUID file and a lock file. When the process
63+
// exits, the lock is released so the next process can reuse that slot
64+
// (and its UUID/metrics history).
65+
// Returns the UUID and a cleanup function that releases the lock.
66+
func acquireAgentSlot() (string, func()) {
67+
dir := agentSlotDir()
68+
_ = os.MkdirAll(dir, 0700)
69+
70+
const maxSlots = 256
71+
for i := 0; i < maxSlots; i++ {
72+
lockPath := filepath.Join(dir, fmt.Sprintf("agent-%d.lock", i))
73+
uuidPath := filepath.Join(dir, fmt.Sprintf("agent-%d.uuid", i))
74+
75+
lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0600)
76+
if err != nil {
77+
continue
78+
}
79+
80+
if err := lockFileExclusive(lockFile); err != nil {
81+
lockFile.Close()
82+
continue
83+
}
84+
85+
// Slot acquired — read or generate UUID
86+
uuid := ""
87+
if data, err := os.ReadFile(uuidPath); err == nil {
88+
if id := strings.TrimSpace(string(data)); len(id) == 36 {
89+
uuid = id
90+
}
91+
}
92+
if uuid == "" {
93+
var buf [16]byte
94+
_, _ = rand.Read(buf[:])
95+
buf[6] = (buf[6] & 0x0f) | 0x40 // version 4
96+
buf[8] = (buf[8] & 0x3f) | 0x80 // variant 1
97+
uuid = fmt.Sprintf("%08x-%04x-%04x-%04x-%012x",
98+
buf[0:4], buf[4:6], buf[6:8], buf[8:10], buf[10:16])
99+
_ = os.WriteFile(uuidPath, []byte(uuid+"\n"), 0600)
63100
}
101+
102+
cleanup := func() {
103+
unlockFile(lockFile)
104+
lockFile.Close()
105+
}
106+
return uuid, cleanup
64107
}
65108

66-
// Generate UUID v4
109+
// Fallback: all slots taken, generate ephemeral UUID with no lock
67110
var buf [16]byte
68111
_, _ = rand.Read(buf[:])
69-
buf[6] = (buf[6] & 0x0f) | 0x40 // version 4
70-
buf[8] = (buf[8] & 0x3f) | 0x80 // variant 1
71-
id := fmt.Sprintf("%08x-%04x-%04x-%04x-%012x",
72-
buf[0:4], buf[4:6], buf[6:8], buf[8:10], buf[10:16])
73-
74-
_ = os.MkdirAll(filepath.Dir(path), 0700)
75-
_ = os.WriteFile(path, []byte(id+"\n"), 0600)
76-
return id
112+
buf[6] = (buf[6] & 0x0f) | 0x40
113+
buf[8] = (buf[8] & 0x3f) | 0x80
114+
return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x",
115+
buf[0:4], buf[4:6], buf[6:8], buf[8:10], buf[10:16]), func() {}
77116
}
78117

79118
// maxConsecutiveErrors is the number of consecutive connection errors before
80119
// Run returns ErrConnectionLost so the caller can decide to restart.
81120
const maxConsecutiveErrors = 10
82121

83122
func (w *Worker) Run(ctx context.Context) error {
123+
if w.slotCleanup != nil {
124+
defer w.slotCleanup()
125+
}
84126
w.log.Info("starting")
85127

86128
// Take initial snapshot for delta computation

0 commit comments

Comments
 (0)