@@ -526,6 +526,99 @@ def test_aba_same_value_no_spurious_suppress_e2e():
526526 assert len (races ) > 0 # ambiguous reads-from → race
527527
528528
529+ def test_producer_consumer_release_xchg_acquire_cas_same_value_poll_no_race ():
530+ """Acquire CAS polling with same-value writeback must not block sw edge.
531+
532+ B0 stores data + release xchg(1); B1 acquire cas(1,1) reads old=1 + writes back 1.
533+ Reader's own writeback must be excluded from the ambiguity scan.
534+ """
535+ DATA_ADDR = 100
536+ FLAG_ADDR = 200
537+
538+ accesses = [
539+ # Block 0 (producer): store data, then release xchg writes 1
540+ _store (block_idx = 0 , offset = DATA_ADDR , event_id = 0 ),
541+ _atomic (
542+ block_idx = 0 ,
543+ offset = FLAG_ADDR ,
544+ event_id = 1 ,
545+ atomic_op = "rmw:xchg" ,
546+ sem = "release" ,
547+ scope = "gpu" ,
548+ atomic_val = np .array ([1 ], dtype = np .int32 ),
549+ atomic_old = np .array ([0 ], dtype = np .int32 ),
550+ ),
551+ # Block 1 (consumer): acquire cas(1,1) reads old=1, writes back 1
552+ _atomic (
553+ block_idx = 1 ,
554+ offset = FLAG_ADDR ,
555+ event_id = 0 ,
556+ atomic_op = "cas" ,
557+ sem = "acquire" ,
558+ scope = "gpu" ,
559+ atomic_cmp = np .array ([1 ], dtype = np .int32 ),
560+ atomic_val = np .array ([1 ], dtype = np .int32 ),
561+ atomic_old = np .array ([1 ], dtype = np .int32 ),
562+ ),
563+ _load (block_idx = 1 , offset = DATA_ADDR , event_id = 1 ),
564+ ]
565+
566+ races = detect_races (accesses )
567+ assert len (races ) == 0
568+
569+
570+ def test_acquire_cas_same_value_poll_still_races_with_third_party_same_value_writer ():
571+ """Third-party same-value writer makes reads-from ambiguous despite CAS polling.
572+
573+ B0 release xchg(1), B2 relaxed xchg(1), B1 acquire cas(1,1) reads old=1.
574+ Third-party writer means we can't tell who wrote the 1 → ambiguous → race.
575+ """
576+ DATA_ADDR = 100
577+ FLAG_ADDR = 200
578+
579+ accesses = [
580+ # Block 0 (producer): store data, then release xchg writes 1
581+ _store (block_idx = 0 , offset = DATA_ADDR , event_id = 0 ),
582+ _atomic (
583+ block_idx = 0 ,
584+ offset = FLAG_ADDR ,
585+ event_id = 1 ,
586+ atomic_op = "rmw:xchg" ,
587+ sem = "release" ,
588+ scope = "gpu" ,
589+ atomic_val = np .array ([1 ], dtype = np .int32 ),
590+ atomic_old = np .array ([0 ], dtype = np .int32 ),
591+ ),
592+ # Block 2 (interloper): relaxed xchg also writes 1
593+ _atomic (
594+ block_idx = 2 ,
595+ offset = FLAG_ADDR ,
596+ event_id = 0 ,
597+ atomic_op = "rmw:xchg" ,
598+ sem = "relaxed" ,
599+ scope = "gpu" ,
600+ atomic_val = np .array ([1 ], dtype = np .int32 ),
601+ atomic_old = np .array ([0 ], dtype = np .int32 ),
602+ ),
603+ # Block 1 (consumer): acquire cas(1,1) reads old=1, writes back 1
604+ _atomic (
605+ block_idx = 1 ,
606+ offset = FLAG_ADDR ,
607+ event_id = 0 ,
608+ atomic_op = "cas" ,
609+ sem = "acquire" ,
610+ scope = "gpu" ,
611+ atomic_cmp = np .array ([1 ], dtype = np .int32 ),
612+ atomic_val = np .array ([1 ], dtype = np .int32 ),
613+ atomic_old = np .array ([1 ], dtype = np .int32 ),
614+ ),
615+ _load (block_idx = 1 , offset = DATA_ADDR , event_id = 1 ),
616+ ]
617+
618+ races = detect_races (accesses )
619+ assert len (races ) > 0
620+
621+
529622def test_add_written_value_correctness_e2e ():
530623 """add(5) with old=10 writes 15; acquire reads old=5 (operand) → no reads-from → race.
531624
0 commit comments