From b89a79f21eac342d7ac1dacd20529831abb23079 Mon Sep 17 00:00:00 2001
From: Jiannan Wang <jiannanwang@meta.com>
Date: Thu, 7 May 2026 16:03:21 -0700
Subject: [PATCH] Fix NCU CSV NaN parser crash on empty unit cells
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unit-row detection in ``load_ncu_metrics`` reads the first CSV row,
lowercases each cell, and checks whether any cell contains a
unit-marker substring. ``Series.str.lower()`` propagates ``pd.NA`` /
``NaN`` cells through as float NaN, and the subsequent
``any(tok in x for tok in unit_tokens)`` then raises::

    TypeError: argument of type 'float' is not iterable

This surfaces on real NCU outputs (e.g. profiles from the
``gemma3_swiglu`` benchmark in SOL-ExecBench) and aborts the entire
profiling step, which then propagates up to the worker as
``Profiling failed`` and the round produces 0 successful kernels.

Fix: chain ``.fillna("")`` after ``.str.lower()`` so NaN cells become
empty strings before the substring check. The matching logic itself is
unchanged — empty strings legitimately don't contain any of the unit
tokens.

Test plan:
- ``pytest tests/`` (existing suite passes)
- Reproduces no longer: running the optimizer against
  ``ka-review-gate-runs/gemma3_swiglu`` previously hit the TypeError on
  every worker's first NCU profile; with the patch the round produces
  successful kernels.
---
 kernel_perf_agent/kernel_opt/profiler/ncu_profiler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel_perf_agent/kernel_opt/profiler/ncu_profiler.py b/kernel_perf_agent/kernel_opt/profiler/ncu_profiler.py
index 350c1e1..69a3a69 100644
--- a/kernel_perf_agent/kernel_opt/profiler/ncu_profiler.py
+++ b/kernel_perf_agent/kernel_opt/profiler/ncu_profiler.py
@@ -361,7 +361,10 @@ def load_ncu_metrics(
 
     # Drop the units row (first row often contains units like "%", "inst", etc.)
     if len(sub) > 0:
-        first_row_str = sub.iloc[0].astype(str).str.lower()
+        # ``.str.lower()`` propagates NaN/pd.NA cells back to float NaN, which
+        # would crash the ``tok in x`` substring check below.  ``fillna("")``
+        # makes the detection NaN-safe without changing the matching logic.
+        first_row_str = sub.iloc[0].astype(str).str.lower().fillna("")
         unit_tokens = ("%", "inst", "cycle", "block", "register", "register/thread")
         if first_row_str.apply(lambda x: any(tok in x for tok in unit_tokens)).any():
             sub = sub.iloc[1:].reset_index(drop=True)