@@ -15,7 +15,7 @@ class StructMatch:
1515
1616def scan_rust_files (directory : str ) -> tuple [defaultdict , dict , list ]:
1717 """
18- Scan .rs files for struct and enum definitions and their usage in type definitions.
18+ Scan camt_*.rs and pacs_* .rs files for struct and enum definitions and their usage in type definitions.
1919 Returns uppercase types for common.rs and lowercase types for removal.
2020 """
2121 type_locations = defaultdict (list )
@@ -24,19 +24,26 @@ def scan_rust_files(directory: str) -> tuple[defaultdict, dict, list]:
2424
2525 # Compile regex patterns once
2626 type_pattern = re .compile (
27- r'(\n\n // (\w+) \.\.\.\n'
27+ r'(// (\w+) \.\.\.\n'
2828 r'#\[derive\(Debug, Default, Serialize, Deserialize, Clone, PartialEq\)\]\n'
29- r'(?:pub struct|pub enum)\s+\w+.*?\t\tOk\(\(\)\)\n\t\}\n\}\n)' ,
29+ r'(?:pub struct|pub enum)\s+\w+.*?'
30+ r'impl \w+ \{\s*pub fn validate\(&self\) -> Result<\(\), ValidationError> \{.*?\n\s*\}\s*\n\})' ,
3031 re .DOTALL
3132 )
3233
33- # More efficient base pattern for type usage
34- base_usage_pattern = r'(?m)^[^/]*?:\s*(?:{}|Option<{}>|Vec<{}>|Option<Vec<{}>>)\s*,'
35- base_usage_pattern = r'(?m)^[^/]*?:\s*([\w\d]+|Option<[\w\d]+>|Vec<[\w\d]+>|Option<Vec<[\w\d]+>>)\s*,'
36- type_usage_patterns = re .compile (base_usage_pattern )
34+ # Pattern for type usage in field definitions
35+ type_usage_patterns = re .compile (r'pub \w+: ([\w\d]+|Option<[\w\d]+>|Vec<[\w\d]+>|Option<Vec<[\w\d]+>>)\s*,' )
3736
3837 dir_path = Path (directory ).resolve ()
39- rust_files = [f for f in os .listdir (dir_path ) if f .endswith ('.rs' )]
38+ # Filter for only camt_*.rs and pacs_*.rs files
39+ rust_files = [f for f in os .listdir (dir_path )
40+ if f .endswith ('.rs' ) and (f .startswith ('camt_' ) or f .startswith ('pacs_' ))]
41+
42+ if not rust_files :
43+ print (f"No camt_*.rs or pacs_*.rs files found in { directory } " )
44+ return defaultdict (list ), {}, []
45+
46+ print (f"Found { len (rust_files )} camt_/pacs_ files: { ', ' .join (sorted (rust_files ))} " )
4047
4148 # Process each file only once
4249 for filename in rust_files :
@@ -50,7 +57,8 @@ def scan_rust_files(directory: str) -> tuple[defaultdict, dict, list]:
5057 type_matches = list (type_pattern .finditer (content ))
5158
5259 def extract_type (type_str ):
53- base_type = re .search (r'(\w+)(?:>)*,?$' , type_str )
60+ # Extract base type from Option<Type>, Vec<Type>, Option<Vec<Type>>, or just Type
61+ base_type = re .search (r'(?:Option<(?:Vec<)?)?(\w+)(?:>)?>?' , type_str )
5462 return base_type .group (1 ) if base_type else None
5563
5664 type_usages = []
@@ -143,40 +151,62 @@ def print_summary(type_locations: defaultdict, lowercase_matches: list, typecoun
143151 print ("-" * 40 )
144152 by_file = defaultdict (list )
145153 for match in lowercase_matches :
146- by_file [match .filename ].append (re .search (r'// (\w+) \.\.\.' , match .content ).group (1 ))
154+ type_match = re .search (r'// (\w+) \.\.\.\n' , match .content )
155+ if type_match :
156+ by_file [match .filename ].append (type_match .group (1 ))
147157
148158 for filename , types in sorted (by_file .items ()):
149159 print (f"{ filename } :" )
150160 for type_name in sorted (types ):
151161 print (f" - { type_name } " )
152162 print ()
153163
154- # Print uppercase types summary
164+ # Identify root structs that should stay in their original files
165+ root_structs = set ()
166+ for filename in set (match .filename for matches in type_locations .values () for match in matches ):
167+ # Extract the message type from filename (e.g., camt_057_001_06.rs -> camt.057.001.06)
168+ if filename .startswith (('camt_' , 'pacs_' )):
169+ # Look for structs that match the file pattern (these are usually the root message structs)
170+ file_base = filename .replace ('.rs' , '' ).replace ('_' , '' ).upper ()
171+ for type_name in type_locations .keys ():
172+ # Root structs often contain the file identifier or are main message types
173+ type_upper = type_name .upper ()
174+ if (file_base [:4 ] in type_upper or # e.g., CAMT in CamtXXXXXXVXX
175+ type_name .endswith (('V01' , 'V02' , 'V03' , 'V04' , 'V05' , 'V06' , 'V07' , 'V08' , 'V09' )) or
176+ 'PROPRIETARYMESSAGE' in type_upper or
177+ 'DOCUMENT' in type_upper ):
178+ # Check if this type is only in one file (root structs shouldn't be duplicated)
179+ if len (type_locations [type_name ]) == 1 and type_locations [type_name ][0 ].filename == filename :
180+ root_structs .add (type_name )
181+
182+ print (f"\n Identified root structs to keep in original files: { ', ' .join (sorted (root_structs ))} " )
183+
184+ # Print uppercase types summary - focus on types that appear in multiple files, excluding root structs
155185 frequent_types = {
156186 name : matches
157187 for name , matches in type_locations .items ()
158- if sum ( match . usage_count for match in matches ) >= typecount
188+ if name not in root_structs and len ( matches ) > 1 # Only move types that appear in multiple files
159189 }
160190
161191 if not frequent_types :
162- print (f"No uppercase types found with usage count >= { typecount } ." )
192+ print (f"No uppercase types found that appear in multiple files (excluding root structs) ." )
163193 return
164194
165- print (f"\n Uppercase types with usage count >= { typecount } :" )
166- print ("-" * 40 )
195+ print (f"\n Uppercase types that appear in multiple files (excluding root structs) :" )
196+ print ("-" * 70 )
167197
168198 # Pre-calculate usage counts
169199 usage_data = [
170200 (type_name , matches , sum (m .usage_count for m in matches ))
171201 for type_name , matches in frequent_types .items ()
172202 ]
173203
174- # Sort by total usage count
204+ # Sort by number of files first, then by total usage count
175205 for type_name , matches , total_usage in sorted (
176- usage_data , key = lambda x : x [ 2 ] , reverse = True
206+ usage_data , key = lambda x : ( len ( x [ 1 ]), x [ 2 ]) , reverse = True
177207 ):
178208 files = [match .filename for match in matches ]
179- print (f"{ type_name } : used { total_usage } times across { len (files )} files" )
209+ print (f"{ type_name } : appears in { len (files )} files, used { total_usage } times total " )
180210 for match in matches :
181211 print (f" - { match .filename } : { match .usage_count } uses" )
182212 print ()
@@ -197,10 +227,10 @@ def read_existing_common(output_file: str) -> tuple[set, str]:
197227
198228def main ():
199229 parser = argparse .ArgumentParser (
200- description = 'Find frequently used structs and move to common.rs'
230+ description = 'Find frequently used structs in camt_*.rs and pacs_*.rs files and move to common.rs'
201231 )
202232 parser .add_argument ('directory' ,
203- help = 'Directory containing .rs files (default: current directory)' ,
233+ help = 'Directory containing camt_*.rs and pacs_* .rs files (default: current directory)' ,
204234 default = '.' ,
205235 nargs = '?' )
206236
0 commit comments