mlcommons
diff --git a/‎benchmark/MLPerfTiny_Rules.adoc‎
Lines changed: 10 additions & 2 deletions b/‎benchmark/MLPerfTiny_Rules.adoc‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎benchmark/runner/README.md‎
Lines changed: 25 additions & 0 deletions b/‎benchmark/runner/README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎benchmark/runner/device_manager.py‎
Lines changed: 8 additions & 6 deletions b/‎benchmark/runner/device_manager.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎benchmark/runner/device_under_test.py‎
Lines changed: 1 addition & 0 deletions b/‎benchmark/runner/device_under_test.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/runner/devices_ad.yaml‎
Lines changed: 1 addition & 0 deletions b/‎benchmark/runner/devices_ad.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/runner/devices_kws_ic_vww.yaml‎
Lines changed: 1 addition & 0 deletions b/‎benchmark/runner/devices_kws_ic_vww.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/runner/devices_sww.yaml‎
Lines changed: 1 addition & 0 deletions b/‎benchmark/runner/devices_sww.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/runner/img/L4R5Zi.png‎
240 KB b/‎benchmark/runner/img/L4R5Zi.png‎
240 KB
diff --git a/‎benchmark/runner/main.py‎
Lines changed: 14 additions & 5 deletions b/‎benchmark/runner/main.py‎
Lines changed: 14 additions & 5 deletions
@@ -177,9 +177,16 @@ The suite includes the following benchmarks:
 |   Visual Wake Words  |           Binary image classification           | Visual Wake Words Dataset |     MobileNet    |   80% (Top 1)
 | Image Classification |            Small image classification           |          Cifar10          |      ResNet      |   85% (Top 1)
 |   Anomaly Detection  | Detecting anomalies in machine operating sounds |          ToyADMOS         | Deep AutoEncoder |   0.85 (AUC)
-|   Streaming Wakeword | Detecting wakewords in a continuous stream of audio| Custom         | 1D DS-CNN |  TBD
+|   Streaming Wakeword | Detecting wakewords in a continuous stream of audio| Custom                 | 1D DS-CNN        | <= 8 FP, <= 8 FN
 |===
 
+
+For the quality target, keyword spotting, visual wakewords, and image classification all use top-1 accuracy as the key metric.  Anomaly detection
+uses the area under the ROC curve (true positive rate vs false positive rate), as computed by
+https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html[sklearn.metrics.roc_auc_score].
+
+The streaming wakeword benchmark uses a combination of false positives and false negatives, requiring no more than 8 of either.
+
 ==== Relaxed constraints for the Open division
 
 1. An Open benchmark must perform a task matching an existing Closed benchmark, and be substitutable in LoadGen for that benchmark.
@@ -193,7 +200,8 @@ The suite includes the following benchmarks:
 
 
 === EnergyRunner™ benchmark framework
-The benchmark suite is run using the EnergyRunner™ benchmark framework from EEMBC, which detects the DUT, sends inputs, and reads outputs over UART.  The EEMBC runner is being phased out.  It will be permitted for teh KWS, VWW, IC, and AD benchmarks in the summer 2015 submission.  After that, only the MLCommons Runner will be permitted.  The EEMBC runner does not support the streaming wakeword benchmark.
+
+The benchmark suite is run using the EnergyRunner™ benchmark framework from EEMBC, which detects the DUT, sends inputs, and reads outputs over UART.  The EEMBC runner is being phased out.  It will be permitted for the KWS, VWW, IC, and AD benchmarks in the summer 2025 submission.  After that, only the MLCommons Runner will be permitted.  The EEMBC runner does not support the streaming wakeword benchmark.
 
 The EEMBC runner is available here: https://github.com/eembc/energyrunner
 The MLCommons runner is available in this repository: https://github.com/mlcommons/tiny/tree/master/benchmark/runner
 
@@ -164,6 +164,25 @@ The device file defines available devices that are automatically detected by the
 - **`usb`**: `dict` where the key is `vid` and the value is a `pid` or a list of `pid`s.
 - **`usb_description`**: A string used to match the USB description.
 
+
+#### Adding a New Device
+You can use the PySerial module's list_ports function to get the VID and PID of a device as long as it presents as a serial interface
+```
+jeremy@macbook-pro-16%>python -m serial.tools.list_ports -v
+/dev/cu.Bluetooth-Incoming-Port
+    desc: n/a
+    hwid: n/a
+/dev/cu.usbmodem1403 <<==== This is the reference DUT 
+    desc: STLINK-V3
+    hwid: USB VID:PID=0483:374E SER=005300313532511531333430 LOCATION=0-1.4
+/dev/cu.usbmodem2061398A4D431  <<==== This is the LPM05a power monitor
+    desc: PowerShield (Virtual ComPort in FS Mode)
+    hwid: USB VID:PID=0483:5740 SER=2061398A4D43 LOCATION=1-1
+/dev/cu.wlan-debug  
+    desc: n/a
+    hwid: n/a
+4 ports found
+```
 ---
 
 ### Device Under Test Configuration `dut.yml`
@@ -272,3 +291,9 @@ If the I2S transfer appears not to be working, here are a few things to try.
 ### Baud Rate for Interface board:
 Located in file /application/user/core/usart.c
 
+<<<<<<< HEAD
+=======
+### A device with vid:pid XX:YY failed to provide a serial number.
+In some cases, multiple devices may have the same VID and PID.  For example, on an MCU development board, the VID/PID may be linked to the vendors debugger/programmer (e.g. ST-Link) rather than to the development board specifically.  To avoid 
+Workaround:  Use a USB-serial converter so that the offending device presents with a different VID:PID.
+>>>>>>> streaming_ww_dev
@@ -17,7 +17,7 @@ def precheck_device_name(dev_cfg, serial_device, mode):
     return True.  If the device on <serial_device> does not respond to the
     "name%" command, or responds but the name does not match check_name return
     False. If the response matches check_name, return True.
-    Note that this function uses teh 'check_name' property, not 'name', which
+    Note that this function uses the 'check_name' property, not 'name', which
     is mostly arbitrary
     ** Arguments:
     - dev_cfg: device configuration dict from devices.yaml
@@ -129,10 +129,8 @@ def scan(self):
         """Scan for both serial and USB-only devices and initialize them."""
         pending_serial = [p for p in list_ports.comports(True) if p.vid]
         matched = []
-        comport_serial_numbers = []
 
         for p in pending_serial:
-            comport_serial_numbers.append(p.serial_number)
             for d in self._device_defs:
                 found = False
                 for vid, pids in d.get("usb", {}).items():
@@ -154,12 +152,16 @@ def scan(self):
         # Additional scan for USB-only devices (non-serial)
         all_usb = usb.core.find(find_all=True)
         for dev in all_usb:
-            if dev.serial_number in comport_serial_numbers:
-                # we already handled this device in the loop on list_ports.comports()
-                continue
             vid = dev.idVendor
             pid = dev.idProduct
+
             for d in self._device_defs:
+                if d.get("interface", "") != "direct_usb":
+                    # this association logic is only for direct (non-serial) devices, like the JS-220.
+                    # so skip it if interface is unspecified or not "direct_usb"
+                    # Without this block, a VID/PID match that has been previously rejected based on
+                    # "name" mismatch can be incorrectly associated here.
+                    continue
                 for k, v in d.get("usb", {}).items():
                     if isinstance(v, list):
                         if pid in v and vid == k:
 
@@ -40,6 +40,7 @@ def _retry(self, method, retries=3):
 
   def _get_name(self):
     name_retrieved = False
+    print("Retrieving name from DUT ...")
     for l in self._port.send_command("name"):
       match = re.match(r'^m-(name)-dut-\[([^]]+)]$', l)
       if match:
 
@@ -33,6 +33,7 @@
     0x0483: 0x374B
 - name: js220
   type: power
+  interface: direct_usb
   preference: 1 # set to higher preference thatn lpm01a to use js220
   raw_sampling_rate: 1000000
   virtual_sampling_rate: 1000
 
@@ -33,6 +33,7 @@
     0x0483: 0x374B
 - name: js220
   type: power
+  interface: direct_usb
   preference: 1 # set to higher preference thatn lpm01a to use js220
   raw_sampling_rate: 1000000
   virtual_sampling_rate: 1000
 
@@ -33,6 +33,7 @@
     0x0483: 0x374B
 - name: js220
   type: power
+  interface: direct_usb
   preference: 1 # set to higher preference thatn lpm01a to use js220
   raw_sampling_rate: 1000000
   virtual_sampling_rate: 1000
 
@@ -203,7 +203,6 @@ def print_energy_results(l_results, energy_sampling_freq=1000, req_cycles=5, res
         total_inference_energy = np.sum(inference_energy_samples)
         num_inferences = res['infer']['iterations']
         energy_per_inf = total_inference_energy / num_inferences
-        latency_per_inf = elapsed_time / num_inferences
         inf_energies[inf_num] = energy_per_inf
         inf_times[inf_num] = elapsed_time
 
@@ -226,6 +225,7 @@ def print_energy_results(l_results, energy_sampling_freq=1000, req_cycles=5, res
 
 # Summarize results
 def summarize_result(result, power, mode, results_file=None):
+    print(20*'-')
     num_correct_files = 0
     total_files = 0
     y_pred = []
@@ -252,7 +252,7 @@ def summarize_result(result, power, mode, results_file=None):
         print_energy_results(result, energy_sampling_freq=1000, results_file=results_file)
         return
 
-    for r in result:
+    for res_num,r in enumerate(result):
         if 'infer' not in r or 'class' not in r or 'file' not in r:
             continue  # Skip malformed or error-only entries
         infer_data = r['infer']
@@ -266,7 +266,13 @@ def summarize_result(result, power, mode, results_file=None):
 
         if 'throughput' in infer_data:
             throughput_values.append(infer_data['throughput'])
-
+            print_tee(f"Performance results for window {res_num+1}", outfile=results_file)
+            print_tee(f"    # Inferences : {infer_data['iterations']}", outfile=results_file)
+            print_tee(f"    Runtime: {infer_data['elapsed_time']/1e6} sec.", outfile=results_file)
+            print_tee(f"    Throughput: {infer_data['throughput']} inf./sec.", outfile=results_file)
+            if infer_data['elapsed_time']/1e6 > 10.0:
+                print_tee(f"    Runtime requirements have been met.", outfile=results_file)
+             
         if file_name not in file_infer_results:
             file_infer_results[file_name] = {'true_class': true_class, 'results': []}
 
@@ -307,8 +313,11 @@ def summarize_result(result, power, mode, results_file=None):
             total_files += 1
 
         accuracy = calculate_accuracy(np.array(y_pred), np.array(y_true))
-        auc = roc_auc_score(np.array(y_true), np.array(y_pred), multi_class='ovr')
-        
+
+        if np.array(y_pred).shape[1] == 2:
+            auc =roc_auc_score(np.array(y_true), np.array(y_pred)[:,1])
+        else:
+            auc =roc_auc_score(np.array(y_true), np.array(y_pred), multi_class='ovr')
 
         current_time = datetime.now()
         formatted_time = current_time.strftime("%m%d.%H%M%S ")