diff --git a/src/ms_ovba/Models/Entities/reference.py b/src/ms_ovba/Models/Entities/reference.py index 68f304f78..05835f1ae 100644 --- a/src/ms_ovba/Models/Entities/reference.py +++ b/src/ms_ovba/Models/Entities/reference.py @@ -32,16 +32,12 @@ def unpack(data: bytes, endien: str) -> Reference: endien_symbol = '<' if endien == 'little' else '>' name = '' offset = 0 - id = struct.unpack_from(endien_symbol + "H", data, offset) + id, size1 = struct.unpack_from(f"{endien_symbol}HI", data, offset) if id == 0x0016: - offset += 2 - size1, = struct.unpack_from(endien_symbol + "I", data, offset) - offset += 4 - format = endien_symbol + size1 + "s" - name, = struct.unpack_from(format, data, offset) - offset += size1 - size2 = struct.unpack_from(endien_symbol + "I", data, offset) - offset += 4 + offset += 6 + format = f"{endien_symbol}{size1}sHI" + name, id, size2 = struct.unpack_from(format, data, offset) + offset += size1 + 6 if size2 != size1 * 2: # raise warning pass diff --git a/src/ms_ovba/Models/Entities/reference_record.py b/src/ms_ovba/Models/Entities/reference_record.py index ee1c7fc67..704684802 100644 --- a/src/ms_ovba/Models/Entities/reference_record.py +++ b/src/ms_ovba/Models/Entities/reference_record.py @@ -22,7 +22,7 @@ def unpack(bytestring: bytes, endien: str) -> ReferenceRecord: ReferenceOriginal ) endien_symbol = '<' if endien == 'little' else '>' - id = struct.unpack(endien_symbol + "H", bytestring) + id, = struct.unpack_from(f"{endien_symbol}H", bytestring, 0) ref: ReferenceRecord if id == 0x000D: ref = ReferenceRegistered.unpack(bytestring, endien) @@ -33,5 +33,5 @@ def unpack(bytestring: bytes, endien: str) -> ReferenceRecord: elif id == 0x0033: ref = ReferenceOriginal.unpack(bytestring, endien) else: - raise Exception("Unknown Reference Type") + raise Exception(f"Unknown Reference Type: {id}") return ref diff --git a/src/ms_ovba/Views/dirStream.py b/src/ms_ovba/Views/dirStream.py index 8d8490736..2eccbb451 100644 --- a/src/ms_ovba/Views/dirStream.py +++ b/src/ms_ovba/Views/dirStream.py @@ -1,12 +1,14 @@ import struct +import warnings from ms_ovba_compression.ms_ovba import MsOvba from ms_ovba.vbaProject import VbaProject +from ms_ovba.Models.Entities.reference import Reference from ms_ovba.Models.Fields.idSizeField import IdSizeField from ms_ovba.Models.Fields.doubleEncodedString import ( DoubleEncodedString ) from ms_ovba.Models.Fields.packed_data import PackedData -from typing import List, TypeVar +from typing import List, TypedDict, TypeVar T = TypeVar('T', bound='DirStream') @@ -15,6 +17,14 @@ PackableData = DoubleEncodedString | IdSizeField | PackedData +class Parameters(TypedDict): + references: list[Reference] + modules: list + help_context_id: int + project_cookie: int + codepage_name: str + + class DirStream(): """ The dir stream is compressed on write @@ -93,3 +103,219 @@ def _load_information(self: T) -> List: constants ]) return information + + @staticmethod + def is_valid(data: bytes) -> bool: + try: + DirStream.from_bytes(data) + except Exception as e: + warnings.warn(str(e), SyntaxWarning) + return False + return True + + @staticmethod + def from_bytes(data: bytes) -> Parameters: + """ + Static validation: Checks if bytes follow the DirStream structure. + Expects decompressed bytes. + """ + project_data: Parameters = { + "references": [], + "modules": [], + "help_context_id": 0, + "project_cookie": 0, + "codepage_name": "cp1252" + } + offset = 0 + + # 1. Check PROJECTSYSKIND (Mandatory first record) + # IdSizeField(1, 4, 3) -> ID=1 (2 bytes), Size=4 (4 bytes) + record_id, size, value = struct.unpack_from(" 2000 or s2 != 2 * size: + raise ValueError( + f"Incorrect PROJECTDOCSTRING({record_id}, {size}, {r2}, {s2})") + offset += 12 + size * 3 + + record_id, size = struct.unpack_from(" 260 or s2 != size or value != v2: + raise ValueError( + f"Incorrect PROJECTHELPFILEPATH({record_id}, {size}, {s2})") + offset += 12 + size * 2 + + record_id, size, value = struct.unpack_from(" 2015 or s2 != 2 * size: + raise ValueError("Incorrect PROJECTCONSTANTS") + offset += 12 + size * 3 + + record_id, size = struct.unpack_from(" bool: + """Helper to validate a compressed .bin file on disk.""" + try: + with open(file_path, "rb") as f: + compressed = f.read() + from ms_ovba_compression.ms_ovba import MsOvba + decompressed = MsOvba().decompress(compressed) + return DirStream.is_valid(decompressed) + except Exception: + return False diff --git a/src/ms_ovba/struct_io.py b/src/ms_ovba/struct_io.py new file mode 100644 index 000000000..5a7419341 --- /dev/null +++ b/src/ms_ovba/struct_io.py @@ -0,0 +1,33 @@ +import io +import struct +from typing import TypeVar + + +T = TypeVar('T', bound='StructIO') + + +class StructIO: + + def __init__(self: T, data: bytes, endian: str = 'little') -> None: + self._stream = io.BytesIO(data) + self._sym = '<' if endian == 'little' else '>' + + def read_big_h(self: T) -> int: + val, = self._read_fmt('H', 2) + return val + + def read_big_i(self: T) -> int: + val, = struct._read_fmt('I', 4) + return val + + def read_id_size_val(self: T) -> tuple[int, int, bytes]: + id, size = struct._read_fmt('HI', 6) + val = self._stream.read(size) + return (id, size, val) + + def _read_fmt(self: T, fmt: str, size: int) -> tuple: + """Helper to read exactly 'size' bytes and unpack them.""" + chunk = self._stream.read(size) + if len(chunk) < size: + raise EOFError(f"Expected {size} bytes, but only got {len(chunk)}") + return struct.unpack(f"{self._sym}{fmt}", chunk) diff --git a/tests/Unit/Views/test_dir.py b/tests/Unit/Views/test_dir.py new file mode 100644 index 000000000..b2021894b --- /dev/null +++ b/tests/Unit/Views/test_dir.py @@ -0,0 +1,52 @@ +import pytest +from pathlib import Path +from ms_cfb.ole_file import OleFile +from ms_ovba.Views.dirStream import DirStream +from ms_ovba_compression.ms_ovba import MsOvba +from unittest import mock + + +mock_vbaproject = mock.Mock() + + +@pytest.fixture +def my_fixture() -> None: + # Setup: Runs BEFORE the test + # print("\nSetting up...") + yield + # Teardown: Runs AFTER the test + Path("tests/blank/dir.bin").unlink(missing_ok=True) + + +def test_construct() -> None: + dir = DirStream(mock_vbaproject) + assert isinstance(dir, DirStream) + + +@pytest.mark.usefixtures("my_fixture") +def test_is_valid() -> None: + file = "tests/blank/vbaProject.bin" + ole_file = OleFile.create_from_file(file) + ole_file.extract_stream('dir', 'tests/blank') + with open('tests/blank/dir.bin', 'rb') as f: + compressed_data = f.read() + + # Use MsOvba to decompress the stream + ms_ovba = MsOvba() + decompressed_data = ms_ovba.decompress(compressed_data) + assert DirStream.is_valid(decompressed_data) + + +@pytest.mark.usefixtures("my_fixture") +def test_from_bytes() -> None: + expected = {} + file = "tests/blank/vbaProject.bin" + ole_file = OleFile.create_from_file(file) + ole_file.extract_stream('dir', 'tests/blank') + with open('tests/blank/dir.bin', 'rb') as f: + compressed_data = f.read() + + # Use MsOvba to decompress the stream + ms_ovba = MsOvba() + decompressed_data = ms_ovba.decompress(compressed_data) + assert DirStream.from_bytes(decompressed_data) == expected diff --git a/tests/blank/dir b/tests/blank/dir index 530e7b735..2d8cf67d4 100644 --- a/tests/blank/dir +++ b/tests/blank/dir @@ -1,14 +1,19 @@ -00000000 01 00 04 00 00 00 03 00 00 00 4A 00 04 00 00 00 ..........J..... -00000010 03 00 00 00 02 00 04 00 00 00 09 04 00 00 14 00 ................ -00000020 04 00 00 00 09 04 00 00 03 00 02 00 00 00 E4 04 ..............ä. -00000030 04 00 0A 00 00 00 56 42 41 50 72 6F 6A 65 63 74 ......VBAProject -00000040 05 00 00 00 00 00 40 00 00 00 00 00 06 00 00 00 ......@......... -00000050 00 00 3D 00 00 00 00 00 07 00 04 00 00 00 00 00 ..=............. -00000060 00 00 08 00 04 00 00 00 00 00 00 00 09 00 04 00 ................ -00000070 00 00 57 02 BE 65 11 00 0C 00 00 00 00 00 3C 00 ..W.¾e........<. -00000080 00 00 00 00 16 00 06 00 00 00 73 74 64 6F 6C 65 ..........stdole References begin at 84 -00000090 3E 00 0C 00 00 00 73 00 74 00 64 00 6F 00 6C 00 >.....s.t.d.o.l. -000000A0 65 00 0D 00 68 00 00 00 5E 00 00 00 2A 5C 47 7B e...h...^...*\G{ +01 00 04 00 00 00 03 00 00 00 .......... +4A 00 04 00 00 00 03 00 00 00 J......... +02 00 04 00 00 00 09 04 00 00 .......... +14 00 04 00 00 00 09 04 00 00 .......... +03 00 02 00 00 00 E4 04 ......ä. +04 00 0A 00 00 00 56 42 41 50 72 6F 6A 65 63 74 ......VBAProject +05 00 00 00 00 00 40 00 00 00 00 00 ......@..... +06 00 00 00 00 00 3D 00 00 00 00 00 ......=..... +07 00 04 00 00 00 00 00 00 00 .......... +08 00 04 00 00 00 00 00 00 00 .......... +09 00 04 00 00 00 57 02 BE 65 11 00 ......W.¾e.. +0C 00 00 00 00 00 3C 00 00 00 00 00 ......<..... + +16 00 06 00 00 00 73 74 64 6F 6C 65 3E 00 0C 00 ......stdole>... +00 00 73 00 74 00 64 00 6F 00 6C 00 65 00 ..s.t.d.o.l.e. +0D 00 68 00 00 00 5E 00 00 00 2A 5C 47 7B ..h...^...*\G{ 000000B0 30 30 30 32 30 34 33 30 2D 30 30 30 30 2D 30 30 00020430-0000-00 000000C0 30 30 2D 43 30 30 30 2D 30 30 30 30 30 30 30 30 00-C000-00000000 000000D0 30 30 34 36 7D 23 32 2E 30 23 30 23 43 3A 5C 57 0046}#2.0#0#C:\W @@ -27,10 +32,12 @@ 000001A0 4D 53 4F 2E 44 4C 4C 23 4D 69 63 72 6F 73 6F 66 MSO.DLL#Microsof 000001B0 74 20 4F 66 66 69 63 65 20 31 36 2E 30 20 4F 62 t Office 16.0 Ob 000001C0 6A 65 63 74 20 4C 69 62 72 61 72 79 00 00 00 00 ject Library.... -000001D0 00 00 0F 00 02 00 00 00 03 00 13 00 02 00 00 00 ................ -000001E0 F3 08 19 00 0C 00 00 00 54 68 69 73 57 6F 72 6B ó.......ThisWork -000001F0 62 6F 6F 6B 47 00 18 00 00 00 54 00 68 00 69 00 bookG.....T.h.i. -00000200 73 00 57 00 6F 00 72 00 6B 00 62 00 6F 00 6F 00 s.W.o.r.k.b.o.o. +000001D0 00 00 0F 00 02 00 00 00 03 00 +13 00 02 00 00 00 F3 08 ......ó. +19 00 0C 00 00 00 54 68 69 73 57 6F 72 6B 62 6F ......ThisWorkbo +6F 6B ok +47 00 18 00 00 00 54 00 68 00 69 00 73 00 57 00 G.....T.h.i.s.W. + 6F 00 72 00 6B 00 62 00 6F 00 6F 00 o.r.k.b.o.o. 00000210 6B 00 1A 00 0C 00 00 00 54 68 69 73 57 6F 72 6B k.......ThisWork 00000220 62 6F 6F 6B 32 00 18 00 00 00 54 00 68 00 69 00 book2.....T.h.i. 00000230 73 00 57 00 6F 00 72 00 6B 00 62 00 6F 00 6F 00 s.W.o.r.k.b.o.o.