1- from typing import List , Optional , Protocol , runtime_checkable
1+ from typing import List , Optional , Protocol
22from uuid import UUID
33
44from dstack ._internal .core .models .logs import (
2121logger = get_logger (__name__ )
2222
2323
24- # Check if elasticsearch is available (optional for ship-only mode)
2524ELASTICSEARCH_AVAILABLE = True
2625try :
2726 from elasticsearch import Elasticsearch
2827 from elasticsearch .exceptions import ApiError , TransportError
2928except ImportError :
3029 ELASTICSEARCH_AVAILABLE = False
3130else :
32-
33- # Catch both API errors and transport/connection errors
3431 ElasticsearchError : tuple = (ApiError , TransportError ) # type: ignore[misc]
3532
3633 class ElasticsearchReader :
@@ -51,9 +48,7 @@ def __init__(
5148 try :
5249 self ._client .info ()
5350 except ElasticsearchError as e :
54- raise LogStorageError (
55- f"Failed to connect to Elasticsearch/OpenSearch: { e } "
56- ) from e
51+ raise LogStorageError (f"Failed to connect to Elasticsearch/OpenSearch: { e } " ) from e
5752
5853 def read (
5954 self ,
@@ -82,12 +77,16 @@ def read(
8277 search_params : dict = {
8378 "index" : self ._index ,
8479 "query" : query ,
85- "sort" : [{"@timestamp" : {"order" : sort_order }}],
80+ "sort" : [
81+ {"@timestamp" : {"order" : sort_order }},
82+ {"_id" : {"order" : sort_order }},
83+ ],
8684 "size" : request .limit ,
8785 }
8886
8987 if request .next_token :
90- search_params ["search_after" ] = [request .next_token ]
88+ parts = request .next_token .split (":" , 1 )
89+ search_params ["search_after" ] = [parts [0 ], parts [1 ]]
9190
9291 try :
9392 response = self ._client .search (** search_params )
@@ -97,7 +96,7 @@ def read(
9796
9897 hits = response .get ("hits" , {}).get ("hits" , [])
9998 logs = []
100- last_sort_value = None
99+ last_sort_values = None
101100
102101 for hit in hits :
103102 source = hit .get ("_source" , {})
@@ -108,9 +107,7 @@ def read(
108107 from datetime import datetime
109108
110109 try :
111- timestamp = datetime .fromisoformat (
112- timestamp_str .replace ("Z" , "+00:00" )
113- )
110+ timestamp = datetime .fromisoformat (timestamp_str .replace ("Z" , "+00:00" ))
114111 except ValueError :
115112 continue
116113 else :
@@ -125,12 +122,12 @@ def read(
125122 )
126123
127124 sort_values = hit .get ("sort" )
128- if sort_values :
129- last_sort_value = sort_values [ 0 ]
125+ if sort_values and len ( sort_values ) >= 2 :
126+ last_sort_values = sort_values
130127
131128 next_token = None
132- if len (logs ) == request .limit and last_sort_value is not None :
133- next_token = str ( last_sort_value )
129+ if len (logs ) == request .limit and last_sort_values is not None :
130+ next_token = f" { last_sort_values [ 0 ] } : { last_sort_values [ 1 ] } "
134131
135132 return JobSubmissionLogs (
136133 logs = logs ,
@@ -149,29 +146,13 @@ def close(self) -> None:
149146 FLUENTBIT_AVAILABLE = False
150147else :
151148
152- @runtime_checkable
153149 class FluentBitWriter (Protocol ):
154- """Protocol for Fluent-bit log writers."""
155-
156- def write (self , tag : str , records : List [dict ]) -> None :
157- """Write log records to Fluent-bit."""
158- ...
150+ def write (self , tag : str , records : List [dict ]) -> None : ...
151+ def close (self ) -> None : ...
159152
160- def close (self ) -> None :
161- """Close any resources."""
162- ...
163-
164- @runtime_checkable
165153 class LogReader (Protocol ):
166- """Protocol for log readers (Interface Segregation Principle)."""
167-
168- def read (self , stream_name : str , request : PollLogsRequest ) -> JobSubmissionLogs :
169- """Read logs from the storage backend."""
170- ...
171-
172- def close (self ) -> None :
173- """Close any resources."""
174- ...
154+ def read (self , stream_name : str , request : PollLogsRequest ) -> JobSubmissionLogs : ...
155+ def close (self ) -> None : ...
175156
176157 class HTTPFluentBitWriter :
177158 """Writes logs to Fluent-bit via HTTP POST."""
@@ -183,11 +164,21 @@ def __init__(self, host: str, port: int) -> None:
183164 def write (self , tag : str , records : List [dict ]) -> None :
184165 for record in records :
185166 try :
186- self ._client .post (
167+ response = self ._client .post (
187168 f"{ self ._endpoint } /{ tag } " ,
188169 json = record ,
189170 headers = {"Content-Type" : "application/json" },
190171 )
172+ response .raise_for_status ()
173+ except httpx .HTTPStatusError as e :
174+ logger .error (
175+ "Fluent-bit HTTP request failed with status %d: %s" ,
176+ e .response .status_code ,
177+ e .response .text ,
178+ )
179+ raise LogStorageError (
180+ f"Fluent-bit HTTP error: status { e .response .status_code } "
181+ ) from e
191182 except httpx .HTTPError as e :
192183 logger .error ("Failed to write log to Fluent-bit via HTTP: %s" , e )
193184 raise LogStorageError (f"Fluent-bit HTTP error: { e } " ) from e
@@ -257,7 +248,6 @@ def __init__(
257248 else :
258249 raise LogStorageError (f"Unsupported Fluent-bit protocol: { protocol } " )
259250
260- # Initialize reader based on configuration (Dependency Inversion Principle)
261251 self ._reader : LogReader
262252 if es_host :
263253 if not ELASTICSEARCH_AVAILABLE :
0 commit comments