@@ -70,6 +70,38 @@ def parse_roc_ym(raw: str) -> tuple[str, int, int, str] | None:
7070 return digits , ad_year , month , month_key
7171
7272
73+ def parse_any_ym (raw : str ) -> tuple [str , int , int , str ] | None :
74+ s = (raw or "" ).strip ()
75+ if not s :
76+ return None
77+
78+ if "/" in s :
79+ parts = s .split ("/" )
80+ if len (parts ) == 2 and parts [0 ].isdigit () and parts [1 ].isdigit ():
81+ y = int (parts [0 ])
82+ m = int (parts [1 ])
83+ if y >= 1900 and 1 <= m <= 12 :
84+ token = f"{ y :04d} { m :02d} "
85+ return token , y , m , f"{ y :04d} -{ m :02d} "
86+
87+ digits = "" .join (ch for ch in s if ch .isdigit ())
88+ if not digits :
89+ return None
90+
91+ if len (digits ) == 6 :
92+ y = int (digits [:4 ])
93+ m = int (digits [- 2 :])
94+ if y >= 1900 and 1 <= m <= 12 :
95+ return digits , y , m , f"{ y :04d} -{ m :02d} "
96+
97+ if len (digits ) == 4 :
98+ y = int (digits )
99+ if y >= 1900 :
100+ return digits , y , 1 , f"{ y :04d} -01"
101+
102+ return parse_roc_ym (raw )
103+
104+
73105def parse_value (raw : str ) -> float | None :
74106 val = raw .strip ().replace ("," , "" )
75107 if not val :
@@ -115,6 +147,50 @@ def read_facts(data_dir: Path) -> list[FactRow]:
115147 return rows
116148
117149
150+ def read_openapi_facts (openapi_dir : Path ) -> list [FactRow ]:
151+ rows : list [FactRow ] = []
152+ if not openapi_dir .exists ():
153+ return rows
154+
155+ for csv_path in sorted (openapi_dir .glob ("*.csv" )):
156+ dataset = f"OPENAPI_{ csv_path .stem } "
157+ with csv_path .open ("r" , encoding = "utf-8-sig" , newline = "" ) as f :
158+ reader = csv .DictReader (f )
159+ if reader .fieldnames is None :
160+ continue
161+
162+ metric_cols = [
163+ c for c in reader .fieldnames if c not in {"年月" , "公告日期" , "TRANS_DATE" }
164+ ]
165+ for rec in reader :
166+ ym_parsed = parse_any_ym (rec .get ("年月" , "" ))
167+ if ym_parsed is None :
168+ continue
169+ token , ad_year , month , month_key = ym_parsed
170+
171+ for metric in metric_cols :
172+ value_raw = (rec .get (metric ) or "" ).strip ()
173+ if value_raw == "" :
174+ continue
175+ rows .append (
176+ FactRow (
177+ dataset = dataset ,
178+ roc_ym = token ,
179+ ad_year = ad_year ,
180+ month = month ,
181+ month_key = month_key ,
182+ institution = "總計" ,
183+ institution_type = "OpenAPI" ,
184+ item_zh = metric ,
185+ item_en = metric ,
186+ value_raw = value_raw ,
187+ value_num = parse_value (value_raw ),
188+ source_file = str (csv_path ),
189+ )
190+ )
191+ return rows
192+
193+
118194def create_sqlite (sqlite_path : Path , rows : Iterable [FactRow ]) -> None :
119195 sqlite_path .parent .mkdir (parents = True , exist_ok = True )
120196 conn = sqlite3 .connect (sqlite_path )
@@ -394,8 +470,10 @@ def main() -> None:
394470 args = parse_args ()
395471 base_path = Path (args .base_path ).resolve ()
396472 data_dir = base_path / "data"
473+ openapi_dir = base_path / "rawdata" / "openapi"
397474
398475 rows = read_facts (data_dir )
476+ rows .extend (read_openapi_facts (openapi_dir ))
399477 if not rows :
400478 raise SystemExit (f"No CSV rows found under { data_dir } " )
401479
0 commit comments