Skip to content

Commit 69e9432

Browse files
committed
feat: refine scan_rust_files function to target specific file patterns and enhance type validation
Updated the scan_rust_files function to specifically scan for camt_*.rs and pacs_*.rs files, improving efficiency and clarity. Enhanced type validation logic to better handle struct and enum definitions, and refined the summary output for identified types. Additionally, made adjustments to the main function's argument parsing to reflect the new file filtering criteria.
1 parent 7304a7d commit 69e9432

14 files changed

Lines changed: 11404 additions & 46426 deletions

examples/pacs008_example.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
// This example demonstrates how to create, serialize, deserialize, and validate
33
// a pacs.008 (FI to FI Customer Credit Transfer) message
44

5+
use mx_message::common::*;
56
use mx_message::document::Document;
67
use mx_message::pacs_008_001_08::*;
7-
use serde_json;
88
use std::error::Error;
99

1010
fn main() -> Result<(), Box<dyn Error>> {

examples/xml_serialization.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// Plasmatic MX Message Parsing Library - XML Serialization Example
22
// This example demonstrates XML serialization and deserialization of pacs.008 messages
33

4+
use mx_message::common::*;
45
use mx_message::document::Document;
56
use mx_message::pacs_008_001_08::*;
67
use quick_xml::de::from_str as xml_from_str;
78
use quick_xml::se::to_string as xml_to_string;
8-
use serde_json;
99
use std::error::Error;
1010

1111
fn main() -> Result<(), Box<dyn Error>> {

generate-common.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class StructMatch:
1515

1616
def scan_rust_files(directory: str) -> tuple[defaultdict, dict, list]:
1717
"""
18-
Scan .rs files for struct and enum definitions and their usage in type definitions.
18+
Scan camt_*.rs and pacs_*.rs files for struct and enum definitions and their usage in type definitions.
1919
Returns uppercase types for common.rs and lowercase types for removal.
2020
"""
2121
type_locations = defaultdict(list)
@@ -24,19 +24,26 @@ def scan_rust_files(directory: str) -> tuple[defaultdict, dict, list]:
2424

2525
# Compile regex patterns once
2626
type_pattern = re.compile(
27-
r'(\n\n// (\w+) \.\.\.\n'
27+
r'(// (\w+) \.\.\.\n'
2828
r'#\[derive\(Debug, Default, Serialize, Deserialize, Clone, PartialEq\)\]\n'
29-
r'(?:pub struct|pub enum)\s+\w+.*?\t\tOk\(\(\)\)\n\t\}\n\}\n)',
29+
r'(?:pub struct|pub enum)\s+\w+.*?'
30+
r'impl \w+ \{\s*pub fn validate\(&self\) -> Result<\(\), ValidationError> \{.*?\n\s*\}\s*\n\})',
3031
re.DOTALL
3132
)
3233

33-
# More efficient base pattern for type usage
34-
base_usage_pattern = r'(?m)^[^/]*?:\s*(?:{}|Option<{}>|Vec<{}>|Option<Vec<{}>>)\s*,'
35-
base_usage_pattern = r'(?m)^[^/]*?:\s*([\w\d]+|Option<[\w\d]+>|Vec<[\w\d]+>|Option<Vec<[\w\d]+>>)\s*,'
36-
type_usage_patterns = re.compile(base_usage_pattern)
34+
# Pattern for type usage in field definitions
35+
type_usage_patterns = re.compile(r'pub \w+: ([\w\d]+|Option<[\w\d]+>|Vec<[\w\d]+>|Option<Vec<[\w\d]+>>)\s*,')
3736

3837
dir_path = Path(directory).resolve()
39-
rust_files = [f for f in os.listdir(dir_path) if f.endswith('.rs')]
38+
# Filter for only camt_*.rs and pacs_*.rs files
39+
rust_files = [f for f in os.listdir(dir_path)
40+
if f.endswith('.rs') and (f.startswith('camt_') or f.startswith('pacs_'))]
41+
42+
if not rust_files:
43+
print(f"No camt_*.rs or pacs_*.rs files found in {directory}")
44+
return defaultdict(list), {}, []
45+
46+
print(f"Found {len(rust_files)} camt_/pacs_ files: {', '.join(sorted(rust_files))}")
4047

4148
# Process each file only once
4249
for filename in rust_files:
@@ -50,7 +57,8 @@ def scan_rust_files(directory: str) -> tuple[defaultdict, dict, list]:
5057
type_matches = list(type_pattern.finditer(content))
5158

5259
def extract_type(type_str):
53-
base_type = re.search(r'(\w+)(?:>)*,?$', type_str)
60+
# Extract base type from Option<Type>, Vec<Type>, Option<Vec<Type>>, or just Type
61+
base_type = re.search(r'(?:Option<(?:Vec<)?)?(\w+)(?:>)?>?', type_str)
5462
return base_type.group(1) if base_type else None
5563

5664
type_usages = []
@@ -143,40 +151,62 @@ def print_summary(type_locations: defaultdict, lowercase_matches: list, typecoun
143151
print("-" * 40)
144152
by_file = defaultdict(list)
145153
for match in lowercase_matches:
146-
by_file[match.filename].append(re.search(r'// (\w+) \.\.\.', match.content).group(1))
154+
type_match = re.search(r'// (\w+) \.\.\.\n', match.content)
155+
if type_match:
156+
by_file[match.filename].append(type_match.group(1))
147157

148158
for filename, types in sorted(by_file.items()):
149159
print(f"{filename}:")
150160
for type_name in sorted(types):
151161
print(f" - {type_name}")
152162
print()
153163

154-
# Print uppercase types summary
164+
# Identify root structs that should stay in their original files
165+
root_structs = set()
166+
for filename in set(match.filename for matches in type_locations.values() for match in matches):
167+
# Extract the message type from filename (e.g., camt_057_001_06.rs -> camt.057.001.06)
168+
if filename.startswith(('camt_', 'pacs_')):
169+
# Look for structs that match the file pattern (these are usually the root message structs)
170+
file_base = filename.replace('.rs', '').replace('_', '').upper()
171+
for type_name in type_locations.keys():
172+
# Root structs often contain the file identifier or are main message types
173+
type_upper = type_name.upper()
174+
if (file_base[:4] in type_upper or # e.g., CAMT in CamtXXXXXXVXX
175+
type_name.endswith(('V01', 'V02', 'V03', 'V04', 'V05', 'V06', 'V07', 'V08', 'V09')) or
176+
'PROPRIETARYMESSAGE' in type_upper or
177+
'DOCUMENT' in type_upper):
178+
# Check if this type is only in one file (root structs shouldn't be duplicated)
179+
if len(type_locations[type_name]) == 1 and type_locations[type_name][0].filename == filename:
180+
root_structs.add(type_name)
181+
182+
print(f"\nIdentified root structs to keep in original files: {', '.join(sorted(root_structs))}")
183+
184+
# Print uppercase types summary - focus on types that appear in multiple files, excluding root structs
155185
frequent_types = {
156186
name: matches
157187
for name, matches in type_locations.items()
158-
if sum(match.usage_count for match in matches) >= typecount
188+
if name not in root_structs and len(matches) > 1 # Only move types that appear in multiple files
159189
}
160190

161191
if not frequent_types:
162-
print(f"No uppercase types found with usage count >= {typecount}.")
192+
print(f"No uppercase types found that appear in multiple files (excluding root structs).")
163193
return
164194

165-
print(f"\nUppercase types with usage count >= {typecount}:")
166-
print("-" * 40)
195+
print(f"\nUppercase types that appear in multiple files (excluding root structs):")
196+
print("-" * 70)
167197

168198
# Pre-calculate usage counts
169199
usage_data = [
170200
(type_name, matches, sum(m.usage_count for m in matches))
171201
for type_name, matches in frequent_types.items()
172202
]
173203

174-
# Sort by total usage count
204+
# Sort by number of files first, then by total usage count
175205
for type_name, matches, total_usage in sorted(
176-
usage_data, key=lambda x: x[2], reverse=True
206+
usage_data, key=lambda x: (len(x[1]), x[2]), reverse=True
177207
):
178208
files = [match.filename for match in matches]
179-
print(f"{type_name}: used {total_usage} times across {len(files)} files")
209+
print(f"{type_name}: appears in {len(files)} files, used {total_usage} times total")
180210
for match in matches:
181211
print(f" - {match.filename}: {match.usage_count} uses")
182212
print()
@@ -197,10 +227,10 @@ def read_existing_common(output_file: str) -> tuple[set, str]:
197227

198228
def main():
199229
parser = argparse.ArgumentParser(
200-
description='Find frequently used structs and move to common.rs'
230+
description='Find frequently used structs in camt_*.rs and pacs_*.rs files and move to common.rs'
201231
)
202232
parser.add_argument('directory',
203-
help='Directory containing .rs files (default: current directory)',
233+
help='Directory containing camt_*.rs and pacs_*.rs files (default: current directory)',
204234
default='.',
205235
nargs='?')
206236

0 commit comments

Comments
 (0)