@@ -270,7 +270,23 @@ func (cs *CloningService) CloneTemplate(req CloneRequest) error {
270270 // Release the vmid allocation mutex now that all of the VMs are cloned on proxmox
271271 cs .vmidMutex .Unlock ()
272272
273- // 9. Configure VNet of all VMs
273+ // 9. Wait for all router disks to be fully available before configuring VNets.
274+ // Proxmox clone is two-phase: the clone lock (Phase 1) releases before the storage
275+ // backend finishes writing the disk (Phase 2). If SetPodVnet runs before Phase 2
276+ // completes, Proxmox's disk finalization can overwrite the net1 config change,
277+ // leaving the router connected to the wrong vnet.
278+ log .Printf ("Waiting for router disks to be available before configuring VNets" )
279+ routerDiskReady := make (map [int ]bool )
280+ for _ , routerInfo := range clonedRouters {
281+ log .Printf ("Waiting for router disk to be available for %s (VMID: %d)" , routerInfo .TargetName , routerInfo .VMID )
282+ if err := cs .ProxmoxService .WaitForDisk (routerInfo .Node , routerInfo .VMID , cs .Config .RouterWaitTimeout ); err != nil {
283+ errors = append (errors , fmt .Sprintf ("router disk unavailable for %s: %v" , routerInfo .TargetName , err ))
284+ } else {
285+ routerDiskReady [routerInfo .VMID ] = true
286+ }
287+ }
288+
289+ // 10. Configure VNet of all VMs
274290 log .Printf ("Configuring VNets for %d targets" , len (req .Targets ))
275291 for _ , target := range req .Targets {
276292 vnetName := fmt .Sprintf ("kamino%d" , target .PodNumber )
@@ -281,7 +297,7 @@ func (cs *CloningService) CloneTemplate(req CloneRequest) error {
281297 }
282298 }
283299
284- // 10 . Start all routers and wait for them to be running
300+ // 11 . Start all routers and wait for them to be running
285301 req .SSE .Send (
286302 ProgressMessage {
287303 Message : "Starting routers" ,
@@ -290,11 +306,7 @@ func (cs *CloningService) CloneTemplate(req CloneRequest) error {
290306 )
291307 log .Printf ("Starting %d routers" , len (clonedRouters ))
292308 for _ , routerInfo := range clonedRouters {
293- // Wait for router disk to be available
294- log .Printf ("Waiting for router disk to be available for %s (VMID: %d)" , routerInfo .TargetName , routerInfo .VMID )
295- err = cs .ProxmoxService .WaitForDisk (routerInfo .Node , routerInfo .VMID , cs .Config .RouterWaitTimeout )
296- if err != nil {
297- errors = append (errors , fmt .Sprintf ("router disk unavailable for %s: %v" , routerInfo .TargetName , err ))
309+ if ! routerDiskReady [routerInfo .VMID ] {
298310 continue
299311 }
300312
@@ -314,7 +326,7 @@ func (cs *CloningService) CloneTemplate(req CloneRequest) error {
314326 }
315327 }
316328
317- // 11 . Configure all pod routers (separate step after all routers are running)
329+ // 12 . Configure all pod routers (separate step after all routers are running)
318330 req .SSE .Send (
319331 ProgressMessage {
320332 Message : "Configuring pod routers" ,
0 commit comments