From 9e35e32fb7a53ea9fb45665afab9a118e49959f0 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:39:09 +0000 Subject: [PATCH 01/10] update test files submodule --- tests/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data b/tests/data index d2da919..1058d1b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit d2da9190e74f82caa70a8bcd844c929182a8dc8e +Subproject commit 1058d1bc6df3b7aae389a9faa15b607357caa41f From ba8ae777ab56f8183595767c07b813781522f0ee Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:39:50 +0000 Subject: [PATCH 02/10] update and add test files --- tests/fixtures.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index a099bb8..ca43662 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -13,5 +13,13 @@ def get_data_path_by_name(name): return DATA / "mixed-mode" / "EmptyClass" / "bin" / "EmptyClass_x86.exe" elif name == "EmptyClass_amd64.exe": return DATA / "mixed-mode" / "EmptyClass" / "bin" / "EmptyClass_amd64.exe" + elif name == "1d41308bf4148b4c138f9307abc696a6e4c05a5a89ddeb8926317685abb1c241": + return DATA / "malware" / "1d41308bf4148b4c138f9307abc696a6e4c05a5a89ddeb8926317685abb1c241" + elif name == "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_": + return DATA / "malware" / "387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_" + elif name == "7f4ba9fc95b30baf8922a6933a4ff1c6a7fef41fae487bb31014c4963357770f.dll_": + return DATA / "malware" / "7f4ba9fc95b30baf8922a6933a4ff1c6a7fef41fae487bb31014c4963357770f.dll_" + elif name == "minimal-res.exe": + return DATA / "minimal-resource" / "bin" / "minimal-res.exe" raise ValueError("unknown test file") From 17e4ae94a94ac99bb563a76faf42f12679bbf237 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:41:38 +0000 Subject: [PATCH 03/10] add LazyList tests with help of Github Copilot --- tests/test_utils.py | 96 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index fc625fb..c799fa1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,102 @@ import dnfile.utils +def test_lazy_list_getitem_and_iteration(): + calls = [] + + def eval_func(index, value): + calls.append((index, value)) + return index if value is None else value + + items = dnfile.utils.LazyList(eval_func, 3) + + # Raw list access bypasses LazyList.__getitem__. + assert list.__getitem__(items, 1) is None + + # Accessing through LazyList evaluates and caches the value. + assert items[1] == 1 + assert calls == [(1, None)] + + calls.clear() + # Iteration should evaluate each item exactly once. + assert list(items) == [0, 1, 2] + assert calls == [(0, None), (1, 1), (2, None)] + + +def test_lazy_list_repeated_access(): + calls = [] + + def eval_func(index, value): + calls.append((index, value)) + return index if value is None else value + + items = dnfile.utils.LazyList(eval_func, 2) + + # A second access should see the cached value, not None. + assert items[0] == 0 + assert items[0] == 0 + assert calls == [(0, None), (0, 0)] + + +def test_lazy_list_slice_access(): + calls = [] + + def eval_func(index, value): + calls.append((index, value)) + if isinstance(index, slice): + start = index.start or 0 + return [start + i for i in range(len(value))] + return index if value is None else value + + items = dnfile.utils.LazyList(eval_func, 4) + + # Slice access should evaluate the slice as a single unit. + assert items[1:3] == [1, 2] + assert calls == [(slice(1, 3, None), [None, None])] + # The evaluated slice should be written back into the underlying list. + assert list.__getitem__(items, 1) == 1 + assert list.__getitem__(items, 2) == 2 + + +def test_lazy_list_eval_all(): + calls = [] + + def eval_func(index, value): + calls.append((index, value)) + return index if value is None else value + + items = dnfile.utils.LazyList(eval_func, 3) + + # eval_all() should force evaluation of every item. + items.eval_all() + # Direct list access should now see initialized values. + assert list.__getitem__(items, 0) == 0 + assert list.__getitem__(items, 1) == 1 + assert list.__getitem__(items, 2) == 2 + assert calls == [(0, None), (1, None), (2, None)] + + +def test_lazy_list_truncate_and_repr(): + calls = [] + + def eval_func(index, value): + calls.append((index, value)) + return index if value is None else value + + items = dnfile.utils.LazyList(eval_func, 4) + + # truncate() should shrink the list without forcing evaluation. + items.truncate(2) + assert len(items) == 2 + assert list.__getitem__(items, 0) is None + assert list.__getitem__(items, 1) is None + + calls.clear() + # repr() forces evaluation of the remaining items. + assert repr(items) == "[0, 1]" + assert calls == [(0, None), (1, None)] + + def test_compressed_int(): assert None is dnfile.utils.read_compressed_int(b"") assert None is dnfile.utils.read_compressed_int(None) From 9a4b25168e3209f08958ce41a44f2f98775faf50 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:42:30 +0000 Subject: [PATCH 04/10] more invalid streams tests with help of Github Copilot --- tests/test_invalid_streams.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_invalid_streams.py b/tests/test_invalid_streams.py index 2a48fa0..2f6fb23 100644 --- a/tests/test_invalid_streams.py +++ b/tests/test_invalid_streams.py @@ -12,12 +12,25 @@ def test_duplicate_stream(): assert dn.net.user_strings.get(1).value == "BBBBBBBB" +def test_baseline_streams_still_parse(): + path = fixtures.DATA / "invalid-streams" / "aaaaa.exe" + + dn = dnfile.dnPE(path) + + assert dn.net is not None + assert dn.net.mdtables.Module is not None + assert dn.net.strings.get(1).value == "" + assert dn.net.user_strings is not None + + def test_unknown_stream(): path = fixtures.DATA / "invalid-streams" / "unknown-stream.exe" dn = dnfile.dnPE(path) assert b"#ZZ" in dn.net.metadata.streams + assert dn.net.mdtables.Module is not None + assert dn.net.strings.get(1).value == "" def test_invalid_stream_name(): From c751630fdc77f5838a9f1a2aa6dc9fbceef69c30 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:43:03 +0000 Subject: [PATCH 05/10] malware test file location change --- tests/test_parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parse.py b/tests/test_parse.py index b020a20..d34154f 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -299,7 +299,7 @@ def test_method_params(): def test_ignore_NumberOfRvaAndSizes(): # .NET loaders ignores NumberOfRvaAndSizes, so attempt to parse anyways - path = fixtures.DATA / "1d41308bf4148b4c138f9307abc696a6e4c05a5a89ddeb8926317685abb1c241" + path = fixtures.get_data_path_by_name("1d41308bf4148b4c138f9307abc696a6e4c05a5a89ddeb8926317685abb1c241") if not path.exists(): raise pytest.xfail("test file 1d41308bf41... (DANGER: malware) not found in test fixtures") From 6a539e1d5125bdad855bd176aa9c443d414b738c Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:50:47 +0000 Subject: [PATCH 06/10] add coded index tests --- tests/test_codedindexes.py | 84 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/test_codedindexes.py diff --git a/tests/test_codedindexes.py b/tests/test_codedindexes.py new file mode 100644 index 0000000..f9ca6d5 --- /dev/null +++ b/tests/test_codedindexes.py @@ -0,0 +1,84 @@ +import fixtures + +import dnfile +from dnfile import codedindex +from dnfile.mdtable import AssemblyRow, FieldRow, MemberRefRow, PropertyRow, TypeDefRow, TypeRefRow + + +def _assert_coded_index_target(index, table_name, row_index, row_type): + # Verify both the resolved table metadata and the concrete row type. + assert index.table is not None + assert index.table.name == table_name + assert index.row_index == row_index + assert isinstance(index.row, row_type) + + +class _FakeTable: + # Tiny stand-in for the real table objects used by the resolver. + def __init__(self, name, rows): + self.name = name + self._rows = rows + + # Mirror the lookup API that coded-index resolution expects. + def get_with_row_index(self, row_index): + return self._rows[row_index] + + +def test_coded_indexes_in_hello_world(): + path = fixtures.get_data_path_by_name("hello-world.exe") + + dn = dnfile.dnPE(path) + assert dn.net is not None + + # TypeDef.Extends should point at the TypeRef entry for System.Object. + typedef = dn.net.mdtables.TypeDef[1] + _assert_coded_index_target(typedef.Extends, "TypeRef", 5, TypeRefRow) + + # MemberRef.Class is another TypeRef-backed coded index. + member_ref = dn.net.mdtables.MemberRef[0] + _assert_coded_index_target(member_ref.Class, "TypeRef", 1, TypeRefRow) + + # CustomAttribute rows carry both parent and constructor-type references. + custom_attribute = dn.net.mdtables.CustomAttribute[0] + _assert_coded_index_target(custom_attribute.Parent, "Assembly", 1, AssemblyRow) + _assert_coded_index_target(custom_attribute.Type, "MemberRef", 1, MemberRefRow) + + +def test_coded_indexes_in_module_code(): + path = fixtures.get_data_path_by_name("ModuleCode_x86.exe") + + dn = dnfile.dnPE(path) + assert dn.net is not None + + # Constant.Parent should resolve back to the field that owns the constant. + constant = dn.net.mdtables.Constant[0] + _assert_coded_index_target(constant.Parent, "Field", 53, FieldRow) + + # MethodSemantics.Association links a method semantics row to a property. + method_semantics = dn.net.mdtables.MethodSemantics[0] + _assert_coded_index_target(method_semantics.Association, "Property", 1, PropertyRow) + + # This fixture also exercises a different CustomAttribute parent/type pair. + custom_attribute = dn.net.mdtables.CustomAttribute[2] + _assert_coded_index_target(custom_attribute.Parent, "TypeDef", 2, TypeDefRow) + _assert_coded_index_target(custom_attribute.Type, "MemberRef", 2, MemberRefRow) + + +def test_coded_index_resolution_without_fixture(): + # Use a sentinel so the test can assert identity rather than equality. + sentinel_row = object() + # Minimal table stub for exercising the resolver without a PE fixture. + tables = [_FakeTable("TypeRef", {1: sentinel_row})] + + # Tag 1 selects the TypeRef table in TypeDefOrRef. + resolved = codedindex.TypeDefOrRef((1 << codedindex.TypeDefOrRef.tag_bits) | 1, tables) + assert resolved.table is not None + assert resolved.table.name == "TypeRef" + assert resolved.row_index == 1 + assert resolved.row is sentinel_row + + # An unknown table tag should leave the table unresolved but preserve the row index. + unresolved = codedindex.TypeDefOrRef((2 << codedindex.TypeDefOrRef.tag_bits) | 2, tables) + assert unresolved.table is None + assert unresolved.row_index == 2 + assert unresolved.row is None From 31ea86c9ccdc4764db477c1043fabc6227685f7e Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 22:53:48 +0000 Subject: [PATCH 07/10] add .NET resources tests --- tests/test_resources.py | 101 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tests/test_resources.py diff --git a/tests/test_resources.py b/tests/test_resources.py new file mode 100644 index 0000000..4e94e90 --- /dev/null +++ b/tests/test_resources.py @@ -0,0 +1,101 @@ +import fixtures + +import dnfile +import pytest +from dnfile.resource import InternalResource, ResourceSet + + +def _resources_by_name(path): + dn = dnfile.dnPE(path) + + assert dn.net is not None + assert dn.net.resources is not None + + return dn, {str(rsrc.name): rsrc for rsrc in dn.net.resources} + + +def test_minimal_resource_fixture_parses(): + path = fixtures.get_data_path_by_name("minimal-res.exe") + + dn, resources = _resources_by_name(path) + + assert len(resources) == 1 + + resource = resources["sample.resources"] + assert isinstance(resource, InternalResource) + assert str(resource.name) == "sample.resources" + assert resource.public is True + assert resource.private is False + assert isinstance(resource.data, ResourceSet) + + resource_set = resource.data + assert resource_set.struct is not None + assert resource_set.struct.Version == 2 + assert resource_set.struct.NumberOfResources == 2 + assert [entry.name for entry in resource_set.entries] == ["Count", "Greeting"] + + count_entry = resource_set.entries[0] + greeting_entry = resource_set.entries[1] + + assert count_entry.type_name == "System.Int32" + assert count_entry.value == 42 + assert greeting_entry.type_name == "System.String" + assert greeting_entry.value == "Hello" + + +def test_mal_resource_fixture_parses_modulo_resources(): + path = fixtures.get_data_path_by_name("387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_") + if not path.exists(): + raise pytest.xfail("test file 38741504... (DANGER: malware) not found in test fixtures") + + dn, resources = _resources_by_name(path) + + assert len(resources) == 13 + + resource = resources["Modulo.g.resources"] + assert isinstance(resource, InternalResource) + assert isinstance(resource.data, ResourceSet) + + resource_set = resource.data + assert resource_set.struct is not None + assert resource_set.struct.Version == 2 + assert resource_set.struct.NumberOfResources == 24 + assert resource_set.entries[0].name == "windowazulso.baml" + assert resource_set.entries[0].type_name == "System.Stream" + assert isinstance(resource_set.entries[0].value, bytes) + assert resource_set.entries[1].name == "resources/logo001.svg" + assert resource_set.entries[1].type_name == "System.Stream" + assert resource_set.entries[1].value.startswith(b" 0 From 037df7e9776b4a8e1442ac0aa05bed0067ad3b81 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 23:33:40 +0000 Subject: [PATCH 08/10] add inline comments --- tests/test_resources.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_resources.py b/tests/test_resources.py index 4e94e90..fab90a1 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -6,6 +6,7 @@ def _resources_by_name(path): + # Normalize the resource list into a name-keyed mapping for direct assertions. dn = dnfile.dnPE(path) assert dn.net is not None @@ -15,6 +16,7 @@ def _resources_by_name(path): def test_minimal_resource_fixture_parses(): + """Verify the minimal fixture preserves one named resource and its decoded values.""" path = fixtures.get_data_path_by_name("minimal-res.exe") dn, resources = _resources_by_name(path) @@ -44,14 +46,18 @@ def test_minimal_resource_fixture_parses(): def test_mal_resource_fixture_parses_modulo_resources(): + """Verify the malware fixture exposes the expected resource set contents.""" path = fixtures.get_data_path_by_name("387f15043f0198fd3a637b0758c2b6dde9ead795c3ed70803426fc355731b173.dll_") + # Skip the fixture when it is not present locally; this malware sample is optional. if not path.exists(): raise pytest.xfail("test file 38741504... (DANGER: malware) not found in test fixtures") dn, resources = _resources_by_name(path) + # This fixture is intentionally richer and should expose many internal resources. assert len(resources) == 13 + # Modulo.g.resources contains both BAML and SVG stream entries. resource = resources["Modulo.g.resources"] assert isinstance(resource, InternalResource) assert isinstance(resource.data, ResourceSet) @@ -69,7 +75,9 @@ def test_mal_resource_fixture_parses_modulo_resources(): def test_mal_resource_fixture_parses_costura_resources(): + """Verify the malware fixture preserves empty, bitmap, and metadata resources.""" path = fixtures.get_data_path_by_name("7f4ba9fc95b30baf8922a6933a4ff1c6a7fef41fae487bb31014c4963357770f.dll_") + # Skip the fixture when it is not present locally; this malware sample is optional. if not path.exists(): raise pytest.xfail("test file 7f4ba9fc... (DANGER: malware) not found in test fixtures") @@ -84,6 +92,7 @@ def test_mal_resource_fixture_parses_costura_resources(): assert empty_resource.data.struct.NumberOfResources == 0 assert empty_resource.data.entries == [] + # Principal.Resources.resources is the main resource bundle to validate. resource = resources["Principal.Resources.resources"] assert isinstance(resource, InternalResource) assert isinstance(resource.data, ResourceSet) From 25ff6079979ecada9ca1c3f62e621e52f3d4f9f0 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 23:37:46 +0000 Subject: [PATCH 09/10] fix lint errors --- tests/test_codedindexes.py | 3 ++- tests/test_resources.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_codedindexes.py b/tests/test_codedindexes.py index f9ca6d5..2d0dabb 100644 --- a/tests/test_codedindexes.py +++ b/tests/test_codedindexes.py @@ -2,7 +2,8 @@ import dnfile from dnfile import codedindex -from dnfile.mdtable import AssemblyRow, FieldRow, MemberRefRow, PropertyRow, TypeDefRow, TypeRefRow +from dnfile.mdtable import (AssemblyRow, FieldRow, MemberRefRow, PropertyRow, + TypeDefRow, TypeRefRow) def _assert_coded_index_target(index, table_name, row_index, row_type): diff --git a/tests/test_resources.py b/tests/test_resources.py index fab90a1..4e00254 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -1,7 +1,7 @@ import fixtures +import pytest import dnfile -import pytest from dnfile.resource import InternalResource, ResourceSet From 9c033c967c55ab50302a04841618ecc9d7164283 Mon Sep 17 00:00:00 2001 From: malwarefrank <42877127+malwarefrank@users.noreply.github.com> Date: Wed, 20 May 2026 23:54:29 +0000 Subject: [PATCH 10/10] fix lint, with correct isort args this time --- tests/test_codedindexes.py | 3 +-- tests/test_resources.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_codedindexes.py b/tests/test_codedindexes.py index 2d0dabb..07627e5 100644 --- a/tests/test_codedindexes.py +++ b/tests/test_codedindexes.py @@ -2,8 +2,7 @@ import dnfile from dnfile import codedindex -from dnfile.mdtable import (AssemblyRow, FieldRow, MemberRefRow, PropertyRow, - TypeDefRow, TypeRefRow) +from dnfile.mdtable import FieldRow, TypeDefRow, TypeRefRow, AssemblyRow, PropertyRow, MemberRefRow def _assert_coded_index_target(index, table_name, row_index, row_type): diff --git a/tests/test_resources.py b/tests/test_resources.py index 4e00254..4164858 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -1,8 +1,8 @@ -import fixtures import pytest +import fixtures import dnfile -from dnfile.resource import InternalResource, ResourceSet +from dnfile.resource import ResourceSet, InternalResource def _resources_by_name(path):