From 234ec1524df2b95bd635c40554ca6eb87c5d602b Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Wed, 11 Mar 2026 14:38:52 +0100 Subject: [PATCH 1/4] Fix ADF15 parser Update regex patterns to handle other raw files (e.g. pec40#w_ic#w0.dat) --- cherab/openadas/parse/adf15.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py index 12aa01a9..a1b60219 100644 --- a/cherab/openadas/parse/adf15.py +++ b/cherab/openadas/parse/adf15.py @@ -38,6 +38,13 @@ 11: 'O', 12: 'Q', 13: 'R', + 14: 'T', + 15: 'U', + 16: 'V', + 17: 'W', + 18: 'X', + 19: 'Y', + 20: 'Z', } @@ -120,7 +127,7 @@ def _scrape_metadata_hydrogen(file, element, charge): wavelength = float(match.groups()[1]) / 10 # convert Angstroms to nm upper_level = int(match.groups()[2]) lower_level = int(match.groups()[3]) - rate_type_adas = match.groups()[4] + rate_type_adas = match.groups()[4].upper() if rate_type_adas == 'EXCIT': rate_type = 'excitation' elif rate_type_adas == 'RECOM': @@ -147,14 +154,14 @@ def _scrape_metadata_hydrogen_like(file, element, charge): file.seek(0) lines = file.readlines() - pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE' + pec_index_header_match = r'^C\s*ISEL\s*(?:WAVELENGTH|WVLEN\(A\))\s*TRANSITION\s*TYPE' while not re.match(pec_index_header_match, lines[0], re.IGNORECASE): lines.pop(0) index_lines = lines for i in range(len(index_lines)): - pec_full_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' + pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE) if not match: continue @@ -163,7 +170,7 @@ def _scrape_metadata_hydrogen_like(file, element, charge): wavelength = float(match.groups()[1]) / 10 # convert Angstroms to nm upper_level = int(match.groups()[2]) lower_level = int(match.groups()[3]) - rate_type_adas = match.groups()[4] + rate_type_adas = match.groups()[4].upper() if rate_type_adas == 'EXCIT': rate_type = 'excitation' elif rate_type_adas == 'RECOM': @@ -193,10 +200,10 @@ def _scrape_metadata_full(file, element, charge): configuration_lines = [] configuration_dict = {} - configuration_header_match = r'^C\s*Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy \(cm\*\*-1\)$' + configuration_header_match = r'^C\s*(?:lv\s+)?Configuration\s*\(2S\+1\)L\(w-1/2\)\s*Energy\s*\(cm(?:\*\*|\^)-1\)\s*$' while not re.match(configuration_header_match, lines[0], re.IGNORECASE): lines.pop(0) - pec_index_header_match = r'^C\s*ISEL\s*WAVELENGTH\s*TRANSITION\s*TYPE' + pec_index_header_match = r'^C\s*ISEL\s*(?:WAVELENGTH|WVLEN\(A\))\s*TRANSITION\s*TYPE' while not re.match(pec_index_header_match, lines[0], re.IGNORECASE): configuration_lines.append(lines[0]) lines.pop(0) @@ -204,7 +211,7 @@ def _scrape_metadata_full(file, element, charge): for i in range(len(configuration_lines)): - configuration_string_match = r"^C\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)" + configuration_string_match = r'^[Cc]\s*([0-9]+)\s+(\S+)\s+\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+(?:\.[0-9]+)?)?\s*$' match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE) if not match: continue @@ -231,7 +238,7 @@ def _scrape_metadata_full(file, element, charge): upper_level = configuration_dict[upper_level_id] lower_level_id = int(match.groups()[3]) lower_level = configuration_dict[lower_level_id] - rate_type_adas = match.groups()[4] + rate_type_adas = match.groups()[4].upper() if rate_type_adas == 'EXCIT': rate_type = 'excitation' elif rate_type_adas == 'RECOM': From b2cdca5e0e5fb79cf2ff81d32560b3949aca863c Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Wed, 11 Mar 2026 14:52:57 +0100 Subject: [PATCH 2/4] Fix regressions for config regex pattern in _scrape_metadata_full --- cherab/openadas/parse/adf15.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py index a1b60219..12067a28 100644 --- a/cherab/openadas/parse/adf15.py +++ b/cherab/openadas/parse/adf15.py @@ -211,7 +211,11 @@ def _scrape_metadata_full(file, element, charge): for i in range(len(configuration_lines)): - configuration_string_match = r'^[Cc]\s*([0-9]+)\s+(\S+)\s+\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+(?:\.[0-9]+)?)?\s*$' + configuration_string_match = ( + r'^[Cc]\s*([0-9]+)\s+([0-9A-Za-z#]+(?:\s+[0-9A-Za-z#]+)*)\s+' + r'\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)' + r'\s*(?:[0-9]+(?:\.[0-9]+)?)?\s*$' + ) match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE) if not match: continue From 6157a17462786aa060ebac69af1ece9e98423125 Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Sun, 22 Mar 2026 00:48:19 +0100 Subject: [PATCH 3/4] Move regex pattern definition out of for-loop --- cherab/openadas/parse/adf15.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py index 12067a28..2a864592 100644 --- a/cherab/openadas/parse/adf15.py +++ b/cherab/openadas/parse/adf15.py @@ -116,9 +116,8 @@ def _scrape_metadata_hydrogen(file, element, charge): lines.pop(0) index_lines = lines + pec_hydrogen_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)' for i in range(len(index_lines)): - - pec_hydrogen_transition_match = r'^C\s*([0-9]*)\.\s*([0-9]*\.[0-9]*)\s*N=\s*([0-9]*) - N=\s*([0-9]*)\s*([A-Z]*)' match = re.match(pec_hydrogen_transition_match, index_lines[i], re.IGNORECASE) if not match: continue @@ -159,9 +158,8 @@ def _scrape_metadata_hydrogen_like(file, element, charge): lines.pop(0) index_lines = lines + pec_full_transition_match = r'^[cC]\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' for i in range(len(index_lines)): - - pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE) if not match: continue @@ -209,13 +207,8 @@ def _scrape_metadata_full(file, element, charge): lines.pop(0) index_lines = lines + configuration_string_match = r'^[cC]\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)' for i in range(len(configuration_lines)): - - configuration_string_match = ( - r'^[Cc]\s*([0-9]+)\s+([0-9A-Za-z#]+(?:\s+[0-9A-Za-z#]+)*)\s+' - r'\(([0-9]+(?:\.[0-9]+)?)\)\s*([0-9]+)\(\s*([0-9]+(?:\.[0-9]+)?)\)' - r'\s*(?:[0-9]+(?:\.[0-9]+)?)?\s*$' - ) match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE) if not match: continue @@ -229,9 +222,8 @@ def _scrape_metadata_full(file, element, charge): configuration_dict[config_id] = (electron_configuration + " " + spin_multiplicity + total_orbital_quantum_number + total_angular_momentum_quantum_number) + pec_full_transition_match = r'^[cC]\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' for i in range(len(index_lines)): - - pec_full_transition_match = r'^C\s*([0-9]*)\.?\s*([0-9]*\.[0-9]*)\s*([0-9]*)[\(\)\.0-9\s]*-\s*([0-9]*)[\(\)\.0-9\s]*([A-Z]*)' match = re.match(pec_full_transition_match, index_lines[i], re.IGNORECASE) if not match: continue From 04ddbd77c5e8d52429d9198a9a42a220f7e97af9 Mon Sep 17 00:00:00 2001 From: munechika-koyo Date: Sun, 22 Mar 2026 01:43:45 +0100 Subject: [PATCH 4/4] Refine regex pattern for configuration string in _scrape_metadata_full --- cherab/openadas/parse/adf15.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cherab/openadas/parse/adf15.py b/cherab/openadas/parse/adf15.py index 2a864592..6ecd424c 100644 --- a/cherab/openadas/parse/adf15.py +++ b/cherab/openadas/parse/adf15.py @@ -207,7 +207,13 @@ def _scrape_metadata_full(file, element, charge): lines.pop(0) index_lines = lines - configuration_string_match = r'^[cC]\s*([0-9]*)\s*((?:[0-9][SPDFG][0-9]\s)*)\s*\(([0-9]*\.?[0-9]*)\)([0-9]*)\(\s*([0-9]*\.?[0-9]*)\)' + configuration_string_match = ( + r'^[cC]\s*([0-9]+)\s*' + r'((?:[0-9][SPDFG][0-9](?:\s+[0-9][SPDFG][0-9])*)|(?:[0-9A-Z]+))\s*' + r'\(([0-9]*\.?[0-9]+)\)' + r'\s*([0-9]+)' + r'\(\s*([0-9]*\.?[0-9]+)\)' + ) for i in range(len(configuration_lines)): match = re.match(configuration_string_match, configuration_lines[i], re.IGNORECASE) if not match: