11import json
22import os
3- import shutil
4- import tempfile
3+ import time
54from datetime import datetime , timezone
5+ from pathlib import Path
66from typing import TYPE_CHECKING , List , Optional
77
88from eval_protocol .common_utils import load_jsonl
99from eval_protocol .dataset_logger .dataset_logger import DatasetLogger
1010from eval_protocol .dataset_logger .directory_utils import find_eval_protocol_datasets_dir
11+ from eval_protocol .singleton_lock import acquire_singleton_lock , release_singleton_lock
1112
1213if TYPE_CHECKING :
1314 from eval_protocol .models import EvaluationRow
1415
1516
1617class LocalFSDatasetLoggerAdapter (DatasetLogger ):
1718 """
18- Logger that stores logs in the local filesystem.
19+ Logger that stores logs in the local filesystem with file locking to prevent race conditions .
1920 """
2021
2122 def __init__ (self ):
@@ -39,6 +40,44 @@ def current_jsonl_path(self) -> str:
3940 """
4041 return os .path .join (self .datasets_dir , f"{ self .current_date } .jsonl" )
4142
43+ def _acquire_file_lock (self , file_path : str , timeout : float = 30.0 ) -> bool :
44+ """
45+ Acquire a lock for a specific file using the singleton lock mechanism.
46+
47+ Args:
48+ file_path: Path to the file to lock
49+ timeout: Maximum time to wait for lock acquisition in seconds
50+
51+ Returns:
52+ True if lock was acquired, False if timeout occurred
53+ """
54+ # Create a lock name based on the file path
55+ lock_name = f"file_lock_{ os .path .basename (file_path )} "
56+ base_dir = Path (os .path .dirname (file_path ))
57+
58+ start_time = time .time ()
59+ while time .time () - start_time < timeout :
60+ result = acquire_singleton_lock (base_dir , lock_name )
61+ if result is None :
62+ # Successfully acquired lock
63+ return True
64+ else :
65+ # Lock is held by another process, wait and retry
66+ time .sleep (0.1 )
67+
68+ return False
69+
70+ def _release_file_lock (self , file_path : str ) -> None :
71+ """
72+ Release the lock for a specific file.
73+
74+ Args:
75+ file_path: Path to the file to unlock
76+ """
77+ lock_name = f"file_lock_{ os .path .basename (file_path )} "
78+ base_dir = Path (os .path .dirname (file_path ))
79+ release_singleton_lock (base_dir , lock_name )
80+
4281 def log (self , row : "EvaluationRow" ) -> None :
4382 """Log a row, updating existing row with same ID or appending new row."""
4483 row_id = row .input_metadata .row_id
@@ -49,25 +88,35 @@ def log(self, row: "EvaluationRow") -> None:
4988 if filename .endswith (".jsonl" ):
5089 file_path = os .path .join (self .datasets_dir , filename )
5190 if os .path .exists (file_path ):
52- with open (file_path , "r" ) as f :
53- lines = f .readlines ()
54-
55- # Find the line with matching ID
56- for i , line in enumerate (lines ):
91+ if self ._acquire_file_lock (file_path ):
5792 try :
58- line_data = json .loads (line .strip ())
59- if line_data ["input_metadata" ]["row_id" ] == row_id :
60- # Update existing row
61- lines [i ] = row .model_dump_json (exclude_none = True ) + os .linesep
62- with open (file_path , "w" ) as f :
63- f .writelines (lines )
64- return
65- except json .JSONDecodeError :
66- continue
93+ with open (file_path , "r" ) as f :
94+ lines = f .readlines ()
95+
96+ # Find the line with matching ID
97+ for i , line in enumerate (lines ):
98+ try :
99+ line_data = json .loads (line .strip ())
100+ if line_data ["input_metadata" ]["row_id" ] == row_id :
101+ # Update existing row
102+ lines [i ] = row .model_dump_json (exclude_none = True ) + os .linesep
103+ with open (file_path , "w" ) as f :
104+ f .writelines (lines )
105+ return
106+ except json .JSONDecodeError :
107+ continue
108+ finally :
109+ self ._release_file_lock (file_path )
67110
68111 # If no existing row found, append new row to current file
69- with open (self .current_jsonl_path , "a" ) as f :
70- f .write (row .model_dump_json (exclude_none = True ) + os .linesep )
112+ if self ._acquire_file_lock (self .current_jsonl_path ):
113+ try :
114+ with open (self .current_jsonl_path , "a" ) as f :
115+ f .write (row .model_dump_json (exclude_none = True ) + os .linesep )
116+ finally :
117+ self ._release_file_lock (self .current_jsonl_path )
118+ else :
119+ raise RuntimeError (f"Failed to acquire lock for log file { self .current_jsonl_path } " )
71120
72121 def read (self , row_id : Optional [str ] = None ) -> List ["EvaluationRow" ]:
73122 """Read rows from all JSONL files in the datasets directory. Also
@@ -82,14 +131,18 @@ def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]:
82131 for filename in os .listdir (self .datasets_dir ):
83132 if filename .endswith (".jsonl" ):
84133 file_path = os .path .join (self .datasets_dir , filename )
85- data = load_jsonl (file_path )
86- for r in data :
87- row = EvaluationRow (** r )
88- if row .input_metadata .row_id not in existing_row_ids :
89- existing_row_ids .add (row .input_metadata .row_id )
90- else :
91- raise ValueError (f"Duplicate Row ID { row .input_metadata .row_id } already exists" )
92- all_rows .append (row )
134+ if self ._acquire_file_lock (file_path ):
135+ try :
136+ data = load_jsonl (file_path )
137+ for r in data :
138+ row = EvaluationRow (** r )
139+ if row .input_metadata .row_id not in existing_row_ids :
140+ existing_row_ids .add (row .input_metadata .row_id )
141+ else :
142+ raise ValueError (f"Duplicate Row ID { row .input_metadata .row_id } already exists" )
143+ all_rows .append (row )
144+ finally :
145+ self ._release_file_lock (file_path )
93146
94147 if row_id :
95148 # Filter by row_id if specified
0 commit comments