Skip to content

Commit b25eb11

Browse files
committed
Add an end-to-end test for patch advisory creation.
Update get_or_create_advisory_references to store the reference type correctly. Update get_or_create_advisory_package_commit_patches to correctly create or update the patch_text field. Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent a636144 commit b25eb11

3 files changed

Lines changed: 248 additions & 23 deletions

File tree

vulnerabilities/pipes/advisory.py

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from django.db import transaction
2020
from django.db.models import Q
2121
from django.db.models.query import QuerySet
22-
from packageurl import PackageURL
2322
from packageurl.contrib.purl2url import purl2url
2423
from packageurl.contrib.url2purl import url2purl
2524

@@ -46,7 +45,6 @@
4645
from vulnerabilities.models import VulnerabilitySeverity
4746
from vulnerabilities.models import Weakness
4847
from vulnerabilities.pipes.univers_utils import get_exact_purls_v2
49-
from vulnerabilities.utils import is_commit
5048

5149

5250
def get_or_create_aliases(aliases: List) -> QuerySet:
@@ -71,7 +69,9 @@ def get_or_create_advisory_references(references: List) -> List[AdvisoryReferenc
7169
existing_urls = {r.url for r in existing}
7270

7371
to_create = [
74-
AdvisoryReference(reference_id=ref.reference_id, url=ref.url)
72+
AdvisoryReference(
73+
reference_id=ref.reference_id, url=ref.url, reference_type=ref.reference_type
74+
)
7575
for ref in references
7676
if ref.url not in existing_urls
7777
]
@@ -110,11 +110,11 @@ def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWea
110110
def get_or_create_advisory_package_commit_patches(
111111
commit_patches_data: List,
112112
) -> List["PackageCommitPatch"]:
113-
114113
if not commit_patches_data:
115114
return []
116115

117-
pairs = [(c.commit_hash, c.vcs_url) for c in commit_patches_data]
116+
data_map = {(c.commit_hash, c.vcs_url): c for c in commit_patches_data}
117+
pairs = list(data_map.keys())
118118

119119
query = Q()
120120
for commit_hash, vcs_url in pairs:
@@ -123,46 +123,63 @@ def get_or_create_advisory_package_commit_patches(
123123
existing_commits_qs = PackageCommitPatch.objects.filter(query)
124124
existing_pairs = set(existing_commits_qs.values_list("commit_hash", "vcs_url"))
125125

126+
to_update = []
127+
for commit_obj in existing_commits_qs:
128+
key = (commit_obj.commit_hash, commit_obj.vcs_url)
129+
input_data = data_map[key]
130+
131+
if (
132+
commit_obj.patch_checksum != input_data.patch_checksum
133+
or commit_obj.patch_text != input_data.patch_text
134+
):
135+
commit_obj.patch_checksum = input_data.patch_checksum
136+
commit_obj.patch_text = input_data.patch_text
137+
to_update.append(commit_obj)
138+
139+
if to_update:
140+
PackageCommitPatch.objects.bulk_update(to_update, fields=["patch_checksum", "patch_text"])
141+
126142
to_create = [
127143
PackageCommitPatch(
128144
commit_hash=c.commit_hash,
129145
vcs_url=c.vcs_url,
130-
patch_text=getattr(c, "patch_text", None),
146+
patch_checksum=c.patch_checksum,
147+
patch_text=c.patch_text,
131148
)
132149
for c in commit_patches_data
133150
if (c.commit_hash, c.vcs_url) not in existing_pairs
134151
]
135152

136153
if to_create:
137-
PackageCommitPatch.objects.bulk_create(to_create, ignore_conflicts=True)
154+
PackageCommitPatch.objects.bulk_create(to_create)
138155

139156
all_commits = PackageCommitPatch.objects.filter(query)
140157
return list(all_commits)
141158

142159

143160
def get_or_create_advisory_patches(
144-
base_patches_data: List,
161+
patches: List,
145162
) -> List["Patch"]:
146-
if not base_patches_data:
163+
if not patches:
147164
return []
148165

149-
pairs = [(c.patch_url, c.patch_checksum) for c in base_patches_data]
166+
pairs = [(c.patch_text, c.patch_url) for c in patches]
150167

151168
query = Q()
152-
for patch_checksum, patch_url in pairs:
153-
query |= Q(patch_checksum=patch_checksum, patch_url=patch_url)
169+
for patch_text, patch_url in pairs:
170+
query |= Q(patch_text=patch_text, patch_url=patch_url)
154171

155172
existing_commits_qs = Patch.objects.filter(query)
156-
existing_pairs = set(existing_commits_qs.values_list("patch_url", "patch_checksum"))
173+
existing_pairs = set(existing_commits_qs.values_list("patch_text", "patch_url"))
157174

158175
to_create = [
159176
Patch(
160177
patch_url=getattr(c, "patch_url", None),
161178
patch_text=getattr(c, "patch_text", None),
162179
patch_checksum=getattr(c, "patch_checksum", None),
163180
)
164-
for c in base_patches_data
165-
if (c.patch_url, c.patch_checksum) not in existing_pairs
181+
for c in patches
182+
if (c.patch_text, c.patch_url) not in existing_pairs
166183
]
167184

168185
if to_create:
@@ -176,7 +193,13 @@ def get_or_create_advisory_patches(
176193

177194

178195
def classify_patch_source(url, commit_hash, patch_text):
179-
"""Classify a patch as a PackageCommitPatchData or PatchData using provided args."""
196+
"""
197+
Classify the patch type based on the given URL, commit hash, and patch text.
198+
Returns: a base_purl, patch_obj tuple where base_purl is a string PackageURL without version for supported VCS URLs, otherwise `None`.
199+
patch_obj is one of: (PackageCommitPatchData for supported VCS URLs with a commit,
200+
PatchData for raw patch text or non-VCS URLs, ReferenceV2 when unsupported VCS URL is paired with a commit hash)
201+
Returns `None` only when both `url` and `patch_text` are missing.
202+
"""
180203
if not url:
181204
if not patch_text:
182205
return
@@ -254,7 +277,7 @@ def insert_advisory_v2(
254277
aliases = get_or_create_advisory_aliases(aliases=advisory.aliases)
255278
references = get_or_create_advisory_references(references=advisory.references_v2)
256279
severities = get_or_create_advisory_severities(severities=advisory.severities)
257-
patches = get_or_create_advisory_patches(base_patches_data=advisory.patches)
280+
patches = get_or_create_advisory_patches(patches=advisory.patches)
258281
weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses)
259282
content_id = compute_content_id(advisory_data=advisory)
260283

vulnerabilities/tests/pipes/test_vulnerablecode_importer_pipeline_v2.py

Lines changed: 206 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
from vulnerabilities.importer import AffectedPackageV2
2121
from vulnerabilities.importer import PackageCommitPatchData
2222
from vulnerabilities.importer import PatchData
23+
from vulnerabilities.importer import ReferenceV2
24+
from vulnerabilities.models import AdvisoryReference
2325
from vulnerabilities.models import AdvisoryV2
2426
from vulnerabilities.models import ImpactedPackage
2527
from vulnerabilities.models import PackageCommitPatch
2628
from vulnerabilities.models import PackageV2
29+
from vulnerabilities.models import Patch
2730
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
31+
from vulnerabilities.pipes.advisory import classify_patch_source
2832

2933

3034
class DummyImporter(VulnerableCodeBaseImporterPipelineV2):
@@ -57,13 +61,13 @@ def dummy_advisory():
5761
introduced_by_commit_patches=[
5862
PackageCommitPatchData(
5963
commit_hash="9ff29db8ec3adefefce0d37c3c9b5b2c22e59fac",
60-
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
64+
vcs_url="https://github.com/abc/def",
6165
)
6266
],
6367
fixed_by_commit_patches=[
6468
PackageCommitPatchData(
6569
commit_hash="ab99939678dc36b3bee0f366493df1aeef521df4",
66-
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
70+
vcs_url="https://github.com/abc/def",
6771
)
6872
],
6973
),
@@ -74,13 +78,13 @@ def dummy_advisory():
7478
introduced_by_commit_patches=[
7579
PackageCommitPatchData(
7680
commit_hash="9ff29db8ec3adefefce0d37c3c9b5b2c22e59fac",
77-
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
81+
vcs_url="https://github.com/abc/def",
7882
)
7983
],
8084
fixed_by_commit_patches=[
8185
PackageCommitPatchData(
8286
commit_hash="ab99939678dc36b3bee0f366493df1aeef521df4",
83-
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
87+
vcs_url="https://github.com/abc/def",
8488
)
8589
],
8690
),
@@ -127,3 +131,201 @@ def test_advisory_import_atomicity(dummy_importer):
127131
assert ImpactedPackage.objects.count() == 2
128132
assert PackageCommitPatch.objects.count() == 2
129133
assert PackageV2.objects.count() == 4
134+
135+
136+
@pytest.fixture
137+
def patch_source_samples():
138+
return [
139+
{"url": "https://github.com/abc/def", "commit_hash": None, "patch_text": None}, # PatchData
140+
{
141+
"url": "https://github.com/abc/def",
142+
"commit_hash": None,
143+
"patch_text": "+1-2",
144+
}, # PatchData
145+
{
146+
"url": "https://github.com/abc/def",
147+
"commit_hash": "be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
148+
"patch_text": None,
149+
}, # PackageCommitPatchData
150+
{
151+
"url": "https://github.com/abc/def",
152+
"commit_hash": "be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
153+
"patch_text": "+1-2",
154+
}, # PackageCommitPatchData
155+
{
156+
"url": "https://github.com/abc/def/commit/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
157+
"commit_hash": None,
158+
"patch_text": None,
159+
}, # PackageCommitPatchData
160+
{
161+
"url": "https://github.com/abc/def/commit/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
162+
"commit_hash": None,
163+
"patch_text": "+1-2",
164+
}, # PackageCommitPatchData
165+
{
166+
"url": "https://github.com/abc/def/commit/a2a5b42fb829b4a873c832b805680fc19199a07e",
167+
"commit_hash": "a2a5b42fb829b4a873c832b805680fc19199a07e",
168+
"patch_text": None,
169+
}, # PackageCommitPatchData
170+
{
171+
"url": "https://github.com/abc/def/commit/a2a5b42fb829b4a873c832b805680fc19199a07e",
172+
"commit_hash": "a2a5b42fb829b4a873c832b805680fc19199a07e",
173+
"patch_text": "+1-2",
174+
}, # PackageCommitPatchData
175+
{
176+
"url": "https://unknown.com/abc/def",
177+
"commit_hash": None,
178+
"patch_text": None,
179+
}, # PatchData
180+
{
181+
"url": "https://unknown.com/abc/def",
182+
"commit_hash": None,
183+
"patch_text": "+1-2",
184+
}, # PatchData
185+
{
186+
"url": "https://unknown.com/abc/def",
187+
"commit_hash": "8eb1b04ca4ae6fc0a0ef46f1b0c042f64db28ff9",
188+
"patch_text": None,
189+
}, # ReferenceV2
190+
{
191+
"url": "https://unknown.com/abc/def",
192+
"commit_hash": "8eb1b04ca4ae6fc0a0ef46f1b0c042f64db28ff9",
193+
"patch_text": "+1-2",
194+
}, # ReferenceV2
195+
{
196+
"url": "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
197+
"commit_hash": None,
198+
"patch_text": None,
199+
}, # PatchData
200+
{
201+
"url": "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
202+
"commit_hash": None,
203+
"patch_text": "+1-2",
204+
}, # PatchData
205+
{
206+
"url": "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
207+
"commit_hash": "be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
208+
"patch_text": None,
209+
}, # ReferenceV2
210+
{
211+
"url": "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
212+
"commit_hash": "be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
213+
"patch_text": "+1-2",
214+
}, # ReferenceV2
215+
]
216+
217+
218+
@pytest.fixture
219+
def dumpy_patch_advisory(patch_source_samples):
220+
references = []
221+
patches = []
222+
affected_packages = []
223+
for entry in patch_source_samples:
224+
url = entry["url"]
225+
commit_hash = entry["commit_hash"]
226+
patch_text = entry["patch_text"]
227+
228+
base_purl, patch_obj = classify_patch_source(
229+
url=url, commit_hash=commit_hash, patch_text=patch_text
230+
)
231+
232+
if isinstance(patch_obj, PackageCommitPatchData):
233+
# For testing only: commit hashes starting with "a" are treated as introduced_by_commit_patches,
234+
# all others are treated as fixed_by_commit_patches.
235+
if patch_obj.commit_hash.startswith("a"):
236+
affected_package = AffectedPackageV2(
237+
package=base_purl,
238+
introduced_by_commit_patches=[patch_obj],
239+
)
240+
else:
241+
affected_package = AffectedPackageV2(
242+
package=base_purl,
243+
fixed_by_commit_patches=[patch_obj],
244+
)
245+
affected_packages.append(affected_package)
246+
elif isinstance(patch_obj, PatchData):
247+
patches.append(patch_obj)
248+
elif isinstance(patch_obj, ReferenceV2):
249+
references.append(patch_obj)
250+
251+
return AdvisoryData(
252+
summary="Test patch advisory",
253+
aliases=["CVE-2025-0001"],
254+
affected_packages=affected_packages,
255+
references_v2=references,
256+
patches=patches,
257+
advisory_id="ADV-1234",
258+
date_published=datetime.now() - timedelta(days=10),
259+
url="https://example.com/advisory/1",
260+
)
261+
262+
263+
@pytest.mark.django_db
264+
def test_patch_advisory(dumpy_patch_advisory):
265+
dumpy_patch_importer = DummyImporter()
266+
dumpy_patch_importer._advisories = [dumpy_patch_advisory]
267+
dumpy_patch_importer.collect_and_store_advisories()
268+
assert AdvisoryV2.objects.count() == 1
269+
adv = AdvisoryV2.objects.get(advisory_id="ADV-1234")
270+
271+
assert ImpactedPackage.objects.count() == 6
272+
assert [
273+
(
274+
package_commit_patch.commit_hash,
275+
package_commit_patch.vcs_url,
276+
package_commit_patch.patch_text,
277+
package_commit_patch.patch_checksum,
278+
)
279+
for package_commit_patch in PackageCommitPatch.objects.all()
280+
] == [
281+
(
282+
"be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
283+
"https://github.com/abc/def",
284+
"+1-2",
285+
"a5d6b89c35224d4ed69910a18fb544ca3fb26f62db53bc2769ce8a8d5cf8874c191186d170cb6e8896b0aaa8eaed891e7e819c4c0c7af499397c84761d6fb22d",
286+
),
287+
(
288+
"a2a5b42fb829b4a873c832b805680fc19199a07e",
289+
"https://github.com/abc/def",
290+
"+1-2",
291+
"a5d6b89c35224d4ed69910a18fb544ca3fb26f62db53bc2769ce8a8d5cf8874c191186d170cb6e8896b0aaa8eaed891e7e819c4c0c7af499397c84761d6fb22d",
292+
),
293+
]
294+
assert (
295+
PackageCommitPatch.objects.count() == 2
296+
) # Only 2 are created because the 6 inputs include duplicates with the VCS URL and commit_hash
297+
assert Patch.objects.count() == 6
298+
assert [
299+
(patch.patch_text, patch.patch_url, patch.patch_checksum) for patch in adv.patches.all()
300+
] == [
301+
(None, "https://github.com/abc/def", None),
302+
(
303+
"+1-2",
304+
"https://github.com/abc/def",
305+
"a5d6b89c35224d4ed69910a18fb544ca3fb26f62db53bc2769ce8a8d5cf8874c191186d170cb6e8896b0aaa8eaed891e7e819c4c0c7af499397c84761d6fb22d",
306+
),
307+
(None, "https://unknown.com/abc/def", None),
308+
(
309+
"+1-2",
310+
"https://unknown.com/abc/def",
311+
"a5d6b89c35224d4ed69910a18fb544ca3fb26f62db53bc2769ce8a8d5cf8874c191186d170cb6e8896b0aaa8eaed891e7e819c4c0c7af499397c84761d6fb22d",
312+
),
313+
(None, "https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d", None),
314+
(
315+
"+1-2",
316+
"https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
317+
"a5d6b89c35224d4ed69910a18fb544ca3fb26f62db53bc2769ce8a8d5cf8874c191186d170cb6e8896b0aaa8eaed891e7e819c4c0c7af499397c84761d6fb22d",
318+
),
319+
]
320+
321+
assert (
322+
AdvisoryReference.objects.count() == 2
323+
) # Only 2 are created because the 6 inputs include duplicates with the same URL and reference ID.
324+
assert [(ref.url, ref.reference_type, ref.reference_id) for ref in adv.references.all()] == [
325+
("https://unknown.com/abc/def", "commit", "8eb1b04ca4ae6fc0a0ef46f1b0c042f64db28ff9"),
326+
(
327+
"https://unknown.com/abc/def/be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
328+
"commit",
329+
"be891173be2fbdc897116bf5aa4fc9fdc8dc4f3d",
330+
),
331+
]

0 commit comments

Comments
 (0)