From 234ec1524df2b95bd635c40554ca6eb87c5d602b Mon Sep 17 00:00:00 2001
From: munechika-koyo <munechika.koyo@gmail.com>
Date: Wed, 11 Mar 2026 14:38:52 +0100
Subject: [PATCH 1/4] Fix ADF15 parser

Update regex patterns to handle other raw files (e.g. pec40#w_ic#w0.dat)
---
 cherab/openadas/parse/adf15.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py
index 12aa01a9..a1b60219 100644
--- a/cherab/openadas/parse/adf15.py
+++ b/cherab/openadas/parse/adf15.py
@@ -38,6 +38,13 @@
     11: 'O',
     12: 'Q',
     13: 'R',
+    14: 'T',
+    15: 'U',
+    16: 'V',
+    17: 'W',
+    18: 'X',
+    19: 'Y',
+    20: 'Z',
 }
 
 
@@ -120,7 +127,7 @@ def _scrape_metadata_hydrogen(file, element, charge):
         wavelength = float(match.groups()[1]) / 10  # convert Angstroms to nm
         upper_level = int(match.groups()[2])
         lower_level = int(match.groups()[3])
-        rate_type_adas = match.groups()[4]
+        rate_type_adas = match.groups()[4].upper()
         if rate_type_adas == 'EXCIT':
             rate_type = 'excitation'
         elif rate_type_adas == 'RECOM':
@@ -147,14 +154,14 @@ def _scrape_metadata_hydrogen_like(file, element, charge):
     file.seek(0)
     lines = file.readlines()
 
-    pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE'
+    pec_index_header_match = r'^C\s*ISEL\s*(?:WAVELENGTH|WVLEN\(A\))\s*TRANSITION\s*TYPE'
     while not re.match(pec_index_header_match, lines[0], re.IGNORECASE):
         lines.pop(0)
     index_lines = lines
 
     for i in range(len(index_lines)):
 
-        pec_full_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
+        pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
         match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE)
         if not match:
             continue
@@ -163,7 +170,7 @@ def _scrape_metadata_hydrogen_like(file, element, charge):
         wavelength = float(match.groups()[1]) / 10  # convert Angstroms to nm
         upper_level = int(match.groups()[2])
         lower_level = int(match.groups()[3])
-        rate_type_adas = match.groups()[4]
+        rate_type_adas = match.groups()[4].upper()
         if rate_type_adas == 'EXCIT':
             rate_type = 'excitation'
         elif rate_type_adas == 'RECOM':
@@ -193,10 +200,10 @@ def _scrape_metadata_full(file, element, charge):
     configuration_lines = []
     configuration_dict = {}
 
-    configuration_header_match = r'^C\s*Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy \(cm\*\*-1\)$'
+    configuration_header_match = r'^C\s*(?:lv\s+)?Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy\s*\(cm(?:\*\*|\^)-1\)\s*$'
     while not re.match(configuration_header_match, lines[0], re.IGNORECASE):
         lines.pop(0)
-    pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE'
+    pec_index_header_match = r'^C\s*ISEL\s*(?:WAVELENGTH|WVLEN\(A\))\s*TRANSITION\s*TYPE'
     while not re.match(pec_index_header_match, lines[0], re.IGNORECASE):
         configuration_lines.append(lines[0])
         lines.pop(0)
@@ -204,7 +211,7 @@ def _scrape_metadata_full(file, element, charge):
 
     for i in range(len(configuration_lines)):
 
-        configuration_string_match = r"^C\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)"
+        configuration_string_match = r'^[Cc]\s*([0-9]+)\s+(\S+)\s+\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+(?:\.[0-9]+)?)?\s*$'
         match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE)
         if not match:
             continue
@@ -231,7 +238,7 @@ def _scrape_metadata_full(file, element, charge):
         upper_level = configuration_dict[upper_level_id]
         lower_level_id = int(match.groups()[3])
         lower_level = configuration_dict[lower_level_id]
-        rate_type_adas = match.groups()[4]
+        rate_type_adas = match.groups()[4].upper()
         if rate_type_adas == 'EXCIT':
             rate_type = 'excitation'
         elif rate_type_adas == 'RECOM':

From b2cdca5e0e5fb79cf2ff81d32560b3949aca863c Mon Sep 17 00:00:00 2001
From: munechika-koyo <munechika.koyo@gmail.com>
Date: Wed, 11 Mar 2026 14:52:57 +0100
Subject: [PATCH 2/4] Fix regressions for config regex pattern in
 _scrape_metadata_full

---
 cherab/openadas/parse/adf15.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py
index a1b60219..12067a28 100644
--- a/cherab/openadas/parse/adf15.py
+++ b/cherab/openadas/parse/adf15.py
@@ -211,7 +211,11 @@ def _scrape_metadata_full(file, element, charge):
 
     for i in range(len(configuration_lines)):
 
-        configuration_string_match = r'^[Cc]\s*([0-9]+)\s+(\S+)\s+\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+(?:\.[0-9]+)?)?\s*$'
+        configuration_string_match = (
+            r'^[Cc]\s*([0-9]+)\s+([0-9A-Za-z#]+(?:\s+[0-9A-Za-z#]+)*)\s+'
+            r'\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)'
+            r'\s*(?:[0-9]+(?:\.[0-9]+)?)?\s*$'
+        )
         match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE)
         if not match:
             continue

From 6157a17462786aa060ebac69af1ece9e98423125 Mon Sep 17 00:00:00 2001
From: munechika-koyo <munechika.koyo@gmail.com>
Date: Sun, 22 Mar 2026 00:48:19 +0100
Subject: [PATCH 3/4] Move regex pattern definition out of for-loop

---
 cherab/openadas/parse/adf15.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py
index 12067a28..2a864592 100644
--- a/cherab/openadas/parse/adf15.py
+++ b/cherab/openadas/parse/adf15.py
@@ -116,9 +116,8 @@ def _scrape_metadata_hydrogen(file, element, charge):
         lines.pop(0)
     index_lines = lines
 
+    pec_hydrogen_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)'
     for i in range(len(index_lines)):
-
-        pec_hydrogen_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)'
         match = re.match(pec_hydrogen_transition_match, index_lines[i], re.IGNORECASE)
         if not match:
             continue
@@ -159,9 +158,8 @@ def _scrape_metadata_hydrogen_like(file, element, charge):
         lines.pop(0)
     index_lines = lines
 
+    pec_full_transition_match = r'^[cC]\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
     for i in range(len(index_lines)):
-
-        pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
         match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE)
         if not match:
             continue
@@ -209,13 +207,8 @@ def _scrape_metadata_full(file, element, charge):
         lines.pop(0)
     index_lines = lines
 
+    configuration_string_match = r'^[cC]\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)'
     for i in range(len(configuration_lines)):
-
-        configuration_string_match = (
-            r'^[Cc]\s*([0-9]+)\s+([0-9A-Za-z#]+(?:\s+[0-9A-Za-z#]+)*)\s+'
-            r'\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)'
-            r'\s*(?:[0-9]+(?:\.[0-9]+)?)?\s*$'
-        )
         match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE)
         if not match:
             continue
@@ -229,9 +222,8 @@ def _scrape_metadata_full(file, element, charge):
         configuration_dict[config_id] = (electron_configuration + " " + spin_multiplicity +
                                          total_orbital_quantum_number + total_angular_momentum_quantum_number)
 
+    pec_full_transition_match = r'^[cC]\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
     for i in range(len(index_lines)):
-
-        pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)'
         match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE)
         if not match:
             continue

From 04ddbd77c5e8d52429d9198a9a42a220f7e97af9 Mon Sep 17 00:00:00 2001
From: munechika-koyo <munechika.koyo@gmail.com>
Date: Sun, 22 Mar 2026 01:43:45 +0100
Subject: [PATCH 4/4] Refine regex pattern for configuration string in
 _scrape_metadata_full

---
 cherab/openadas/parse/adf15.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py
index 2a864592..6ecd424c 100644
--- a/cherab/openadas/parse/adf15.py
+++ b/cherab/openadas/parse/adf15.py
@@ -207,7 +207,13 @@ def _scrape_metadata_full(file, element, charge):
         lines.pop(0)
     index_lines = lines
 
-    configuration_string_match = r'^[cC]\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)'
+    configuration_string_match = (
+        r'^[cC]\s*([0-9]+)\s*'
+        r'((?:[0-9][SPDFG][0-9](?:\s+[0-9][SPDFG][0-9])*)|(?:[0-9A-Z]+))\s*'
+        r'\(([0-9]*\.?[0-9]+)\)'
+        r'\s*([0-9]+)'
+        r'\(\s*([0-9]*\.?[0-9]+)\)'
+    )
     for i in range(len(configuration_lines)):
         match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE)
         if not match: