-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathcommon_utils.py
More file actions
30 lines (26 loc) · 1.22 KB
/
common_utils.py
File metadata and controls
30 lines (26 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import re
from typing import Any, Dict, List
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
"""
Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
Args:
file_path: Path to the JSONL file.
Returns:
A list of dictionaries, where each dictionary is a parsed JSON object from a line.
Returns an empty list if the file is not found or if errors occur during parsing.
"""
data: List[Dict[str, Any]] = []
with open(file_path, "r", encoding="utf-8") as f:
for line_number, line in enumerate(f):
try:
data.append(json.loads(line.strip()))
except json.JSONDecodeError as e:
print(f"Error parsing JSON line for file {file_path} at line {line_number}")
# attempt to find "row_id" in the line by finding index of "row_id" and performing regex of `"row_id": (.*),`
row_id_index = line.find("row_id")
if row_id_index != -1:
row_id = re.search(r'"row_id": (.*),', line[row_id_index:])
raise ValueError(f"{e.msg} at line {line_number}: {line} ({row_id})")
raise e
return data