From 205901ea3844486fa0088c0d09b8fe953937c151 Mon Sep 17 00:00:00 2001 From: Kaden Gruizenga Date: Thu, 21 Aug 2025 22:58:24 -0400 Subject: [PATCH] fix(core): implement missing API functions used by CLI and __init__; add GetModelForEncoding alias; expose GetValidModels/GetValidEncodings; return ordered model mappings; bump to 1.8.1 --- PyTokenCounter/core.py | 36 +++++++++++++++++++++++++++++++----- pyproject.toml | 2 +- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/PyTokenCounter/core.py b/PyTokenCounter/core.py index f42c8af..b235158 100644 --- a/PyTokenCounter/core.py +++ b/PyTokenCounter/core.py @@ -31,7 +31,7 @@ import tiktoken -from .progress import _InitializeTask, _UpdateTask, _tasks +from .progress import _InitializeTask, _tasks, _UpdateTask MODEL_MAPPINGS = { "gpt-4o": "o200k_base", @@ -281,8 +281,6 @@ } - - def GetModelMappings() -> OrderedDict: """ Get the mappings between models and their encodings. @@ -302,6 +300,9 @@ def GetModelMappings() -> OrderedDict: """ + return OrderedDict(MODEL_MAPPINGS) + + def GetModelForEncodingName(encodingName: str) -> list[str] | str: """ Get the model name for a given encoding. @@ -354,6 +355,13 @@ def GetModelForEncodingName(encodingName: str) -> list[str] | str: return sorted(modelMatches) +def GetModelForEncoding(encodingName: str) -> list[str] | str: + """ + Alias of GetModelForEncodingName for backward compatibility. + """ + return GetModelForEncodingName(encodingName) + + def GetEncodingForModel(modelName: str, quiet: bool = False) -> tiktoken.Encoding: """ Get the encoding for a given model name. @@ -436,6 +444,26 @@ def GetEncodingNameForModel(modelName: str, quiet: bool = False) -> str: return MODEL_MAPPINGS[modelName] +def GetValidModels() -> list[str]: + """ + List all valid model names. + """ + return list(VALID_MODELS) + + +def GetValidEncodings() -> list[str]: + """ + List all unique valid encoding names. + """ + seen: set[str] = set() + unique: list[str] = [] + for enc in VALID_ENCODINGS: + if enc not in seen: + seen.add(enc) + unique.append(enc) + return unique + + def GetEncoding( model: str | None = None, encodingName: str | None = None, @@ -1081,5 +1109,3 @@ def GetNumTokenStr( ) return len(tokens) - - diff --git a/pyproject.toml b/pyproject.toml index ddd2178..33cc56e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "PyTokenCounter" -version = "1.8.0" +version = "1.8.1" description = "A Python library for tokenizing text and counting tokens using various encoding schemes." readme = {file = "README.md", content-type = "text/markdown"} requires-python = ">=3.11"