@@ -27,60 +27,102 @@ type Worker struct {
2727 labels string
2828 pollInterval time.Duration
2929 uuid string
30+ slotCleanup func ()
3031 log * logrus.Entry
3132
3233 metricsMu sync.Mutex
3334 lastCounters * RawCounters
3435}
3536
3637func NewWorker (client * Client , docker DockerConfig , vcsOpts vcs.Options , labels string ) * Worker {
38+ uuid , cleanup := acquireAgentSlot ()
39+ client .SetUUID (uuid )
3740 return & Worker {
3841 client : client ,
3942 docker : docker ,
4043 vcsOpts : vcsOpts ,
4144 labels : labels ,
4245 pollInterval : 1 * time .Second ,
43- uuid : loadOrGenerateUUID (),
46+ uuid : uuid ,
47+ slotCleanup : cleanup ,
4448 log : logrus .WithField ("component" , "agent" ),
4549 }
4650}
4751
48- // uuidFilePath returns the path to the persistent UUID file in the user's config directory .
49- func uuidFilePath () string {
52+ // agentSlotDir returns the directory for agent UUID slot files .
53+ func agentSlotDir () string {
5054 dir , err := os .UserConfigDir ()
5155 if err != nil {
5256 dir = os .TempDir ()
5357 }
54- return filepath .Join (dir , "actionforge" , "agent-uuid" )
58+ return filepath .Join (dir , "actionforge" )
5559}
5660
57- // loadOrGenerateUUID loads a persistent UUID from disk, or generates and saves a new one.
58- func loadOrGenerateUUID () string {
59- path := uuidFilePath ()
60- if data , err := os .ReadFile (path ); err == nil {
61- if id := strings .TrimSpace (string (data )); len (id ) == 36 {
62- return id
61+ // acquireAgentSlot finds and locks the lowest available agent slot.
62+ // Each slot has a persistent UUID file and a lock file. When the process
63+ // exits, the lock is released so the next process can reuse that slot
64+ // (and its UUID/metrics history).
65+ // Returns the UUID and a cleanup function that releases the lock.
66+ func acquireAgentSlot () (string , func ()) {
67+ dir := agentSlotDir ()
68+ _ = os .MkdirAll (dir , 0700 )
69+
70+ const maxSlots = 256
71+ for i := 0 ; i < maxSlots ; i ++ {
72+ lockPath := filepath .Join (dir , fmt .Sprintf ("agent-%d.lock" , i ))
73+ uuidPath := filepath .Join (dir , fmt .Sprintf ("agent-%d.uuid" , i ))
74+
75+ lockFile , err := os .OpenFile (lockPath , os .O_CREATE | os .O_RDWR , 0600 )
76+ if err != nil {
77+ continue
78+ }
79+
80+ if err := lockFileExclusive (lockFile ); err != nil {
81+ lockFile .Close ()
82+ continue
83+ }
84+
85+ // Slot acquired — read or generate UUID
86+ uuid := ""
87+ if data , err := os .ReadFile (uuidPath ); err == nil {
88+ if id := strings .TrimSpace (string (data )); len (id ) == 36 {
89+ uuid = id
90+ }
91+ }
92+ if uuid == "" {
93+ var buf [16 ]byte
94+ _ , _ = rand .Read (buf [:])
95+ buf [6 ] = (buf [6 ] & 0x0f ) | 0x40 // version 4
96+ buf [8 ] = (buf [8 ] & 0x3f ) | 0x80 // variant 1
97+ uuid = fmt .Sprintf ("%08x-%04x-%04x-%04x-%012x" ,
98+ buf [0 :4 ], buf [4 :6 ], buf [6 :8 ], buf [8 :10 ], buf [10 :16 ])
99+ _ = os .WriteFile (uuidPath , []byte (uuid + "\n " ), 0600 )
63100 }
101+
102+ cleanup := func () {
103+ unlockFile (lockFile )
104+ lockFile .Close ()
105+ }
106+ return uuid , cleanup
64107 }
65108
66- // Generate UUID v4
109+ // Fallback: all slots taken, generate ephemeral UUID with no lock
67110 var buf [16 ]byte
68111 _ , _ = rand .Read (buf [:])
69- buf [6 ] = (buf [6 ] & 0x0f ) | 0x40 // version 4
70- buf [8 ] = (buf [8 ] & 0x3f ) | 0x80 // variant 1
71- id := fmt .Sprintf ("%08x-%04x-%04x-%04x-%012x" ,
72- buf [0 :4 ], buf [4 :6 ], buf [6 :8 ], buf [8 :10 ], buf [10 :16 ])
73-
74- _ = os .MkdirAll (filepath .Dir (path ), 0700 )
75- _ = os .WriteFile (path , []byte (id + "\n " ), 0600 )
76- return id
112+ buf [6 ] = (buf [6 ] & 0x0f ) | 0x40
113+ buf [8 ] = (buf [8 ] & 0x3f ) | 0x80
114+ return fmt .Sprintf ("%08x-%04x-%04x-%04x-%012x" ,
115+ buf [0 :4 ], buf [4 :6 ], buf [6 :8 ], buf [8 :10 ], buf [10 :16 ]), func () {}
77116}
78117
79118// maxConsecutiveErrors is the number of consecutive connection errors before
80119// Run returns ErrConnectionLost so the caller can decide to restart.
81120const maxConsecutiveErrors = 10
82121
83122func (w * Worker ) Run (ctx context.Context ) error {
123+ if w .slotCleanup != nil {
124+ defer w .slotCleanup ()
125+ }
84126 w .log .Info ("starting" )
85127
86128 // Take initial snapshot for delta computation
0 commit comments