-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDC2CSV.py
More file actions
44 lines (33 loc) · 2.09 KB
/
DC2CSV.py
File metadata and controls
44 lines (33 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# AI UTILIZED IN THE MAKING OF THIS SOFTWARE. AI UTILIZED WAS CHATGPT-4o
# DC2CSV.py
# Created by:
# Garrett Greathouse
# Erin Tallman
import pandas as pd
# Setting input and output file names. TODO: add user input for input file name and path
# Input NEEDS to be the direct OWASP Dependency Checker's CSV output (make sure to add "-f 'CSV'" to your dependency checker command to do so)
input_file = "dc_out.csv"
output_file = "final.csv"
# List of columns that will be removed towards the end (this was what we didn't need, may differ to what you need)
# You can add in future columns that you create in here, too, as long as they're made and utilized before deletion
columns_to_remove = ["CVSSv2", "CVSSv3", "Identifiers", "CPE", "DuplicateCheck", "Project", "ScanDate", "DependencyPath", "Description", "License", "Md5", "Sha1", "CPE Confidence", "VendorProject", "Product", "Name", "DateAdded", "ShortDescription"]
# Load input CSV
df = pd.read_csv(input_file)
# Creating a DuplicateCheck column to avoid duplicate entries
# (i.e. if multiple pom.xml files reference the same dependency, this will have it only show up once)
df["DuplicateCheck"] = df["Identifiers"] + df["CVE"]
# Cleaning up scores to avoid long doubles, along with making an average score between v2 and v3
df["CVSSv3_BaseScore"] = df["CVSSv3_BaseScore"].round(1)
df["CVSSv2_Score"] = df["CVSSv2_Score"].round(1)
df["CVSS Score"] = df["CVSSv3_BaseScore"].combine_first(df["CVSSv2_Score"])
# Removing duplicates
df = df.drop_duplicates(subset=["DuplicateCheck"], keep="first") # Keeps first instance
df = df[~df["CVE"].str.contains("GHSA", na=False)] # NaN check
# Remove unneeded columns (MAKE SURE ANY NEWLY CREATED COLUMNS LISTED IN columns_to_remove HAVE BEEN PROCESSED ACCORDINGLY)
df = df.drop(columns=columns_to_remove, errors="ignore") # errors="ignore" avoids crashes if column missing
# Sorting by CVSS Score
df = df.sort_values(by=["CVSS Score"], ascending=False)
# Output as CSV with name listed in output_file
df.to_csv(output_file, index=False)
# Notifies user of completion w/ file name
print(f"Cleaned CSV saved as: {output_file}")