Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions qlib/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,8 +779,8 @@ def period_feature(self, instrument, field, start_index, end_index, cur_time, pe
if not field.endswith("_q") and not field.endswith("_a"):
raise ValueError("period field must ends with '_q' or '_a'")
quarterly = field.endswith("_q")
index_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.index"
data_path = C.dpm.get_data_uri() / "financial" / instrument.lower() / f"{field}.data"
index_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.index"
data_path = C.dpm.get_data_uri() / "financial" / instrument / f"{field}.data"
if not (index_path.exists() and data_path.exists()):
raise FileNotFoundError("No file is found.")
# NOTE: The most significant performance loss is here.
Expand Down
4 changes: 3 additions & 1 deletion qlib/data/storage/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,9 @@ class FileFeatureStorage(FileStorageMixin, FeatureStorage):
def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict = None, **kwargs):
super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
self._provider_uri = None if provider_uri is None else C.DataPathManager.format_provider_uri(provider_uri)
self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
# NOTE: instrument case is normalized by code_to_fname() before reaching here.
# freq/field are also normalized to lowercase for path consistency.
self.file_name = f"{instrument}/{field.lower()}.{freq.lower()}.bin"

def clear(self):
with self.uri.open("wb") as _:
Expand Down
14 changes: 10 additions & 4 deletions qlib/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,10 +916,16 @@ def code_to_fname(code: str):
replace_names += [f"LPT{i}" for i in range(10)]

prefix = "_qlib_"
if str(code).upper() in replace_names:
code = prefix + str(code)

return code
code = str(code)
if code.upper() in replace_names:
code = prefix + code

# Normalize to lowercase for case-insensitive file paths.
# All file-based storage (FileFeatureStorage, FileInstrumentStorage, etc.)
# assumes lowercase paths internally, but not all callers pre-normalize.
# Centralizing the normalization here prevents path mismatches on
# case-sensitive filesystems (e.g., Linux ext4).
return code.lower()


def fname_to_code(fname: str):
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_collector/yahoo/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def download_index_data(self):
logger.warning(f"get {_index_name} error: {e}")
continue
df.columns = ["date", "open", "close", "high", "low", "volume", "money", "change"]
df["date"] = pd.to_datetime(df["date"])
df["date"] = pd.to_datetime(df["date"], format="mixed")
df = df.astype(float, errors="ignore")
df["adjclose"] = df["close"]
df["symbol"] = f"sh{_index_code}"
Expand Down Expand Up @@ -392,7 +392,7 @@ def normalize_yahoo(
columns = copy.deepcopy(YahooNormalize.COLUMNS)
df = df.copy()
df.set_index(date_field_name, inplace=True)
df.index = pd.to_datetime(df.index)
df.index = pd.to_datetime(df.index, format="mixed")
df.index = df.index.tz_localize(None)
df = df[~df.index.duplicated(keep="first")]
if calendar_list is not None:
Expand Down