88 3. Per-Worker Stats — 各 worker 缓存利用排名
99
1010用法:
11- python3 stat_cache_hitrate.py <log_file> [--tail N|2k|30m|2h|1d ] [--output DIR]
11+ python3 stat_cache_hitrate.py <log_file> [--tail N|Nk|Nw ] [--output DIR]
1212"""
1313
1414import argparse
1515import json
16- import math
1716import os
1817import re
1918import subprocess
2827from chart import render_bar , render_sparkline , render_table
2928from log_parser import (
3029 complete_time_arg ,
31- extract_ts ,
3230 filter_file_by_time_range ,
3331 parse_cache_strategy_line ,
3432 parse_stats_line ,
@@ -172,16 +170,10 @@ def count_lines(filepath):
172170def read_lines (filepath , tail = None ):
173171 """读取日志文件,支持 tail 模式。"""
174172 if tail is not None :
175- if isinstance (tail , str ) and tail .endswith ("m" ):
176- # 按时间 tail:读取全部行,过滤最近 N 分钟
177- minutes = int (tail [:- 1 ])
178- all_lines = _read_file_lines (filepath )
179- return _filter_by_time (all_lines , minutes )
180- else :
181- # 按行数 tail
182- n = int (tail )
183- result = subprocess .run (["tail" , "-n" , str (n ), filepath ], capture_output = True , text = True )
184- return result .stdout .splitlines () if result .returncode == 0 else []
173+ # 按行数 tail
174+ n = int (tail )
175+ result = subprocess .run (["tail" , "-n" , str (n ), filepath ], capture_output = True , text = True )
176+ return result .stdout .splitlines () if result .returncode == 0 else []
185177 return _read_file_lines (filepath )
186178
187179
@@ -190,35 +182,6 @@ def _read_file_lines(filepath):
190182 return f .readlines ()
191183
192184
193- def _filter_by_time (lines , minutes ):
194- """过滤最近 N 分钟的日志行。"""
195- # 找最后一行的时间戳作为基准
196- last_ts = None
197- for line in reversed (lines ):
198- ts = extract_ts (line )
199- if ts :
200- last_ts = parse_ts (ts )
201- break
202- if not last_ts :
203- return lines
204-
205- from datetime import timedelta
206-
207- cutoff = last_ts - timedelta (minutes = minutes )
208- result = []
209- for line in lines :
210- ts = extract_ts (line )
211- if ts :
212- try :
213- if parse_ts (ts ) >= cutoff :
214- result .append (line )
215- except ValueError :
216- result .append (line )
217- else :
218- result .append (line )
219- return result
220-
221-
222185# ════════════════════════════════════════════════════════════════
223186# Phase 2: 日志提取与解析
224187# ════════════════════════════════════════════════════════════════
@@ -237,7 +200,7 @@ def grep_and_parse(filepath, grep_pattern, parse_cmd, tail=None):
237200 """大文件模式:grep 过滤 + log_parser.py CLI 管道解析。"""
238201 parser_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )), "log_parser.py" )
239202
240- if tail and not ( isinstance ( tail , str ) and tail . endswith ( "m" )) :
203+ if tail :
241204 grep_cmd = f"tail -n { tail } { _shell_quote (filepath )} | grep -F { _shell_quote (grep_pattern )} | python3 { _shell_quote (parser_path )} { parse_cmd } "
242205 else :
243206 grep_cmd = f"grep -F { _shell_quote (grep_pattern )} { _shell_quote (filepath )} | python3 { _shell_quote (parser_path )} { parse_cmd } "
@@ -255,7 +218,7 @@ def grep_and_parse(filepath, grep_pattern, parse_cmd, tail=None):
255218
256219def grep_count (filepath , grep_pattern , tail = None ):
257220 """大文件模式:grep 计数。"""
258- if tail and not ( isinstance ( tail , str ) and tail . endswith ( "m" )) :
221+ if tail :
259222 cmd = f"tail -n { tail } { _shell_quote (filepath )} | grep -cE { _shell_quote (grep_pattern )} "
260223 else :
261224 cmd = f"grep -cE { _shell_quote (grep_pattern )} { _shell_quote (filepath )} "
@@ -283,7 +246,7 @@ def extract_data(filepath, tail=None):
283246 strategy_recs = grep_and_parse (filepath , STRATEGY_PATTERN , "parse-cache-strategy" , tail )
284247 stats_recs = grep_and_parse (filepath , STATS_PATTERN , "parse-stats" , tail )
285248 inference_count = grep_count (filepath , r"\] \[POST\] /v1/chat/completions |\] \[POST\] /v1/completions " , tail )
286- line_count = int (tail ) if tail is not None and not ( isinstance ( tail , str ) and tail . endswith ( "m" )) else total
249+ line_count = int (tail ) if tail is not None else total
287250 return strategy_recs , stats_recs , inference_count , line_count
288251
289252
@@ -989,7 +952,7 @@ def parse_args():
989952 "--tail" ,
990953 nargs = "?" ,
991954 const = "2000" ,
992- help = "只分析尾部数据(支持 2000/2k 行,或 30m/2h/1d 时间窗口) " ,
955+ help = "只分析尾部数据(支持 2000、1k、1w 等行数写法)。按时间请使用 --start/--end " ,
993956 )
994957 parser .add_argument (
995958 "--output" , default = None , help = "详细报告输出目录(默认:skill_output/stat-cache-hitrate/<timestamp>/)"
@@ -1002,42 +965,28 @@ def parse_args():
1002965
1003966
1004967def parse_tail_arg (tail_str ):
1005- """解析 --tail 参数,返回 int(行数) 或 '<minutes>m'(时间窗口) 。"""
968+ """解析 --tail 参数,返回行数 int。支持数字及 k/w 缩写 。"""
1006969 if tail_str is None :
1007970 return None
1008971
1009972 s = str (tail_str ).strip ().lower ()
1010973 if not s :
1011974 raise ValueError ("--tail 不能为空" )
1012975
1013- # 行数: 2000
1014- if re .fullmatch (r"\d+" , s ):
1015- value = int (s )
1016- if value <= 0 :
1017- raise ValueError ("--tail 行数必须 > 0" )
1018- return value
1019-
1020- # 行数缩写: 2k => 2000
1021- m = re .fullmatch (r"(\d+)k" , s )
1022- if m :
1023- value = int (m .group (1 )) * 1000
1024- if value <= 0 :
1025- raise ValueError ("--tail 行数必须 > 0" )
1026- return value
1027-
1028- # 时间窗口: 30m/2h/1d(最终统一成分钟)
1029- m = re .fullmatch (r"(\d+)(m|h|d)" , s )
1030- if m :
1031- num = int (m .group (1 ))
1032- unit = m .group (2 )
1033- if num <= 0 :
1034- raise ValueError ("--tail 时间窗口必须 > 0" )
1035- factor = {"m" : 1 , "h" : 60 , "d" : 1440 }[unit ]
1036- minutes = num * factor
1037- minutes = max (1 , math .ceil (minutes ))
1038- return f"{ minutes } m"
1039-
1040- raise ValueError ("不支持的 --tail 格式:请使用 2000/2k 或 30m/2h/1d" )
976+ m = re .fullmatch (r"(\d+)([kw])?" , s )
977+ if not m :
978+ raise ValueError ("不支持的 --tail 格式:请使用 2000、1k、1w 等行数写法。按时间请改用 --start/--end" )
979+
980+ value = int (m .group (1 ))
981+ unit = m .group (2 )
982+ if unit == "k" :
983+ value *= 1000
984+ elif unit == "w" :
985+ value *= 10000
986+
987+ if value <= 0 :
988+ raise ValueError ("--tail 行数必须 > 0" )
989+ return value
1041990
1042991
1043992def main ():
0 commit comments