99991 · ExtReMLapin · Aug 12, 2024 · Aug 12, 2024
diff --git a/gguf.py → gguf_parser.py b/gguf.py → gguf_parser.py
@@ -3,100 +3,56 @@
 import struct
 import warnings
 import numpy as np
+from gguf import GGMLQuantizationType, GGML_QUANT_SIZES, GGUFValueType
+from gguf.quants import dequantize as gguf_dequantize
 
-GGML_TYPES = {
-    "F32": 0,
-    "Q4_0": 2,
-    "Q5_0": 6,
-    "Q8_0": 8,
-    "Q2_K": 10,
-    "Q3_K": 11,
-    "Q4_K": 12,
-    "Q5_K": 13,
-    "Q6_K": 14,
-}
 
-GGML_NAMES = {ggml_type: name for name, ggml_type in GGML_TYPES.items()}
+GGML_TYPES = { enum_name : enum_value.value for enum_name, enum_value in GGMLQuantizationType.__members__.items() }
 
-GGML_BLOCK_SIZES = {
-    "F32": 4,
-    "Q4_0": 2 + 16,
-    "Q5_0": 2 + 4 + 16,
-    "Q8_0": 2 + 32,
-    "Q2_K": 256 // 16 + 256 // 4 + 2 + 2,
-    "Q3_K": 256 // 8 + 256 // 4 + 12 + 2,
-    "Q4_K": 2 + 2 + 12 + 256 // 2,
-    "Q5_K": 2 + 2 + 12 + 256 // 8 + 256 // 2,
-    "Q6_K": 256 // 2 + 256 // 4 + 256 // 16 + 2,
-}
+GGML_NAMES = {ggml_type: name for name, ggml_type in GGML_TYPES.items()}
 
-GGML_ELEMENTS_PER_BLOCK = {
-    "F32": 1,
-    "Q4_0": 32,
-    "Q5_0": 32,
-    "Q8_0": 32,
-    "Q2_K": 256,
-    "Q3_K": 256,
-    "Q4_K": 256,
-    "Q5_K": 256,
-    "Q6_K": 256,
-}
+GGML_BLOCK_SIZES, GGML_ELEMENTS_PER_BLOCK = ({quant_name: GGML_QUANT_SIZES[GGMLQuantizationType[quant_name]][i] for quant_name in GGML_TYPES} for i in [1, 0])
 
-DATA_TYPES = {
-    "uint8": 0,
-    "int8": 1,
-    "uint16": 2,
-    "int16": 3,
-    "uint32": 4,
-    "int32": 5,
-    "float32": 6,
-    "bool": 7,
-    "string": 8,
-    "array": 9,
-    "uint64": 10,
-    "int64": 11,
-    "float64": 12,
-}
 
 def read_value(f, data_type):
-    if data_type == DATA_TYPES["string"]:
+    if data_type == GGUFValueType.STRING:
         length = struct.unpack("<Q", f.read(8))[0]
         return f.read(length).decode("utf-8")
 
-    elif data_type == DATA_TYPES["uint32"]:
+    elif data_type == GGUFValueType.UINT32:
         return struct.unpack("<I", f.read(4))[0]
 
-    elif data_type == DATA_TYPES["uint64"]:
+    elif data_type == GGUFValueType.UINT64:
         return struct.unpack("<Q", f.read(8))[0]
 
-    elif data_type == DATA_TYPES["int64"]:
+    elif data_type == GGUFValueType.INT64:
         return struct.unpack("<q", f.read(8))[0]
 
-    elif data_type == DATA_TYPES["int32"]:
+    elif data_type == GGUFValueType.INT32:
         return struct.unpack("<i", f.read(4))[0]
 
-    elif data_type == DATA_TYPES["float32"]:
+    elif data_type == GGUFValueType.FLOAT32:
         return struct.unpack("<f", f.read(4))[0]
 
-    elif data_type == DATA_TYPES["float64"]:
+    elif data_type == GGUFValueType.FLOAT64:
         return struct.unpack("<d", f.read(4))[0]
 
-    elif data_type == DATA_TYPES["bool"]:
+    elif data_type == GGUFValueType.BOOL:
         return struct.unpack("<?", f.read(1))[0]
 
-    elif data_type == DATA_TYPES["uint8"]:
+    elif data_type == GGUFValueType.UINT8:
         return struct.unpack("<B", f.read(1))[0]
 
-    elif data_type == DATA_TYPES["int8"]:
+    elif data_type == GGUFValueType.INT8:
         return struct.unpack("<b", f.read(1))[0]
 
-    elif data_type == DATA_TYPES["uint16"]:
+    elif data_type == GGUFValueType.UINT16:
         return struct.unpack("<H", f.read(2))[0]
 
-    elif data_type == DATA_TYPES["int16"]:
+    elif data_type == GGUFValueType.INT16:
         return struct.unpack("<h", f.read(2))[0]
 
-    elif data_type == DATA_TYPES["array"]:
+    elif data_type == GGUFValueType.ARRAY:
         data_type, count = struct.unpack("<IQ", f.read(4 + 8))
         return [read_value(f, data_type) for _ in range(count)]
 
@@ -113,19 +69,19 @@ def load_gguf(f):
 
     info = {}
     for _ in range(n_kv):
-        name = read_value(f, DATA_TYPES["string"])
+        name = read_value(f, GGUFValueType.STRING)
 
         data_type = struct.unpack("<I", f.read(4))[0]
 
         info[name] = read_value(f, data_type)
 
     tensorinfo = {}
     for _ in range(n_tensors):
-        name = read_value(f, DATA_TYPES["string"])
-        shape_len = read_value(f, DATA_TYPES["uint32"])
-        shape = [read_value(f, DATA_TYPES["uint64"]) for _ in range(shape_len)]
-        ggml_type = read_value(f, DATA_TYPES["uint32"])
-        bad_offset = read_value(f, DATA_TYPES["uint64"])
+        name = read_value(f, GGUFValueType.STRING)
+        shape_len = read_value(f, GGUFValueType.UINT32)
+        shape = [read_value(f, GGUFValueType.UINT64) for _ in range(shape_len)]
+        ggml_type = read_value(f, GGUFValueType.UINT32)
+        bad_offset = read_value(f, GGUFValueType.UINT64)
 
         tensorinfo[name] = {
             "ggml_type": ggml_type,
@@ -434,14 +390,18 @@ def load_gguf_tensor(f, tensorinfo, name):
     block_size = GGML_BLOCK_SIZES[ggml_name]
     elements_per_block = GGML_ELEMENTS_PER_BLOCK[ggml_name]
     dequantize = GGML_DEQUANTIZE[ggml_name]
-
+    
     num_elements = np.prod(shape)
 
     f.seek(offset)
 
     size = num_elements * block_size // elements_per_block
     data = f.read(size)
+
+
     values = dequantize(data)
+    values_b = gguf_dequantize(data, GGMLQuantizationType(ggml_type))
+    print(data)
 
     return values.reshape(shape[::-1])
 

diff --git a/test.py b/test.py
@@ -2,7 +2,7 @@
 import tqdm
 import requests
 import numpy as np
-import gguf
+import gguf_parser
 import time
 import multiprocessing.pool
 from safetensors.torch import load_file
@@ -31,7 +31,7 @@ def test_tensor(args):
     # Note that the file has to be opened for every thread because f.seek and
     # f.read on the same file from multiple threads would cause chaos.
     with open(filename, "r+b") as f:
-        weights = gguf.load_gguf_tensor(f, tensorinfo, name)
+        weights = gguf_parser.load_gguf_tensor(f, tensorinfo, name)
 
         shape = tensorinfo[name]["shape"]
 
@@ -44,7 +44,7 @@ def test_tensor(args):
             weights = weights.transpose(0, 2, 1, 3)
             weights = weights.reshape(shape[::-1])
 
-        other_name = gguf.translate_name(name)
+        other_name = gguf_parser.translate_name(name)
 
         expected = state_dict[other_name].float().numpy().astype(np.float32)
 
@@ -101,7 +101,7 @@ def main():
             #import mmap
             #f =  mmap.mmap(f.fileno(), 0)
 
-            info, tensorinfo = gguf.load_gguf(f)
+            info, tensorinfo = gguf_parser.load_gguf(f)
 
             print("gguf metadata")
             for key, value in info.items():