diff --git a/README.md b/README.md index 7507fe6..81b1e03 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,11 @@ Flags: -n, --name string Name for the SBOM document (default "sbomit-sbom") -o, --output string Output file path (default: stdout) --project-dir string Project directory to scan with the cataloger (default: current directory) - --types strings Attestation types to parse (comma-separated). (default [material,command-run,product,network-trace]) + --types strings Attestation types to parse (comma-separated). (default [material,command-run,product,network-trace,maven]) -v, --version string Version for the SBOM document (default "0.0.1") ``` -By default, `sbomit` parses `material`, `command-run`, and `product` attestations. To restrict parsing on demand: +By default, `sbomit` parses `material`, `command-run`, `product`, `network-trace`, and `maven` attestations. To restrict parsing on demand: ```bash sbomit generate attestation.json --types command-run @@ -55,9 +55,11 @@ sbomit generate attestation.json --catalog syft --project-dir /path/to/project ### Attestation Extractors Modular extractors for different attestation types: -- `MaterialExtractor` - Build Input materials +- `MaterialExtractor` - Build input materials - `CommandRunExtractor` - Opened files from processes - `ProductExtractor` - Built artifacts +- `NetworkTrace` - External download connections +- `MavenExtractor` - Structured Java dependencies Implement `Extractor` interface to add new types. @@ -68,14 +70,16 @@ Ecosystem-specific package resolvers that extract packages from file paths: - `GoResolver` - Resolves from module cache paths under `pkg/mod/` - `RustResolver` - Resolves from Cargo registry paths - `JavaScriptResolver` - Resolves pnpm-style paths under `node_modules/.pnpm/` +- `JavaResolver` - Resolves from Maven local repo (`.m2`) and Gradle caches Each resolver implements `Resolver` and optionally `PackageFileFilterer` to filter its own package files. ### Processing Pipeline ``` -Attestation → Extract Files → Filter Cache Files → -Run Resolvers → Filter Package Files → Generate SBOM +Attestation → Extract Files & Network Conns → Run Resolvers → +Filter Package Files → Resolve Network PURLs → Merge with Catalog (Syft) → +Generate SBOM Document ``` ## Testing diff --git a/cmd/generate.go b/cmd/generate.go index c4af136..e074f05 100644 --- a/cmd/generate.go +++ b/cmd/generate.go @@ -25,10 +25,9 @@ var generateCmd = &cobra.Command{ Short: "Generate an SBOM from witness attestations", Long: `Generate a Software Bill of Materials (SBOM) from witness attestation files. -This command parses witness attestations, extracts file and network information -from material, command-run, product, and network-trace attestations, resolves -files and network connections to packages by ecosystem, and outputs an SBOM in -the specified format. +This command parses witness attestations, extracts file and network information from material, command-run, product, +network-trace, and maven attestations, resolves files and network connections +to packages by ecosystem, and outputs an SBOM in the specified format. Supported output formats: - spdx23 (default): SPDX 2.3 JSON format @@ -56,7 +55,7 @@ func init() { generateCmd.Flags().StringVarP(&documentName, "name", "n", "sbomit-sbom", "Name for the SBOM document") generateCmd.Flags().StringVarP(&documentVersion, "version", "v", "0.0.1", "Version for the SBOM document") generateCmd.Flags().StringSliceVar(&authors, "author", []string{}, "Document authors (can be specified multiple times)") - generateCmd.Flags().StringSliceVar(&attestationTypes, "types", []string{"material", "command-run", "product", "network-trace"}, "Attestation types to parse (comma-separated).") + generateCmd.Flags().StringSliceVar(&attestationTypes, "types", []string{"material", "command-run", "product", "network-trace", "maven"}, "Attestation types to parse (comma-separated).") generateCmd.Flags().StringVarP(&catalog, "catalog", "c", "", "Cataloger to run before processing attestations (supported: syft)") generateCmd.Flags().StringVar(&projectDir, "project-dir", "", "Project directory to scan with the cataloger (default: current directory)") } diff --git a/pkg/attestation/extractor.go b/pkg/attestation/extractor.go index dfe8824..d5e780d 100644 --- a/pkg/attestation/extractor.go +++ b/pkg/attestation/extractor.go @@ -24,6 +24,7 @@ func NewExtractorChain() *ExtractorChain { chain.RegisterExtractor(NewMaterialExtractor()) chain.RegisterExtractor(NewCommandRunExtractor()) chain.RegisterExtractor(NewProductExtractor()) + chain.RegisterExtractor(NewMavenExtractor()) return chain } diff --git a/pkg/attestation/maven_extractor.go b/pkg/attestation/maven_extractor.go new file mode 100644 index 0000000..6bc63ca --- /dev/null +++ b/pkg/attestation/maven_extractor.go @@ -0,0 +1,75 @@ +package attestation + +import ( + "fmt" +) + +// MavenExtractor extracts Maven dependencies from witness maven attestations +type MavenExtractor struct{} + +func NewMavenExtractor() *MavenExtractor { + return &MavenExtractor{} +} + +func (e *MavenExtractor) Name() string { + return "maven" +} + +func (e *MavenExtractor) Extract(data map[string]interface{}) []FileInfo { + var files []FileInfo + + maven, ok := data["maven"].(map[string]interface{}) + if !ok { + return nil + } + + deps, ok := maven["dependencies"].([]interface{}) + if !ok { + return nil + } + + for _, d := range deps { + dep, ok := d.(map[string]interface{}) + if !ok { + continue + } + + groupId, _ := dep["groupId"].(string) + artifactId, _ := dep["artifactId"].(string) + version, _ := dep["version"].(string) + + if groupId == "" || artifactId == "" || version == "" { + continue + } + + // Convert back to structured path so JavaResolver can process it + // This path is virtual and used for coordinate extraction + groupPath := "" + for _, part := range fmt.Sprintf("%s", groupId) { + if part == '.' { + groupPath += "/" + } else { + groupPath += string(part) + } + } + + virtualPath := fmt.Sprintf("/virtual/.m2/repository/%s/%s/%s/%s-%s.jar", + groupPath, artifactId, version, artifactId, version) + + hashes := make(map[string]string) + if h, ok := dep["hashes"].(map[string]interface{}); ok { + for k, v := range h { + if hashVal, ok := v.(string); ok { + hashes[k] = hashVal + } + } + } + + files = append(files, FileInfo{ + Path: virtualPath, + Hashes: hashes, + }) + } + + return files +} diff --git a/pkg/attestation/maven_extractor_test.go b/pkg/attestation/maven_extractor_test.go new file mode 100644 index 0000000..e0cbc3b --- /dev/null +++ b/pkg/attestation/maven_extractor_test.go @@ -0,0 +1,38 @@ +package attestation + +import ( + "testing" +) + +func TestMavenExtractor_Extract(t *testing.T) { + e := NewMavenExtractor() + data := map[string]interface{}{ + "maven": map[string]interface{}{ + "dependencies": []interface{}{ + map[string]interface{}{ + "groupId": "org.slf4j", + "artifactId": "slf4j-api", + "version": "1.7.32", + "hashes": map[string]interface{}{ + "sha1": "abc", + }, + }, + }, + }, + } + + files := e.Extract(data) + if len(files) != 1 { + t.Fatalf("Extract() got %v files, want 1", len(files)) + } + + f := files[0] + expectedPath := "/virtual/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.jar" + if f.Path != expectedPath { + t.Errorf("Path = %v, want %v", f.Path, expectedPath) + } + + if f.Hashes["sha1"] != "abc" { + t.Errorf("Hash[sha1] = %v, want abc", f.Hashes["sha1"]) + } +} diff --git a/pkg/generator/generator.go b/pkg/generator/generator.go index 00b6add..d8b5f02 100644 --- a/pkg/generator/generator.go +++ b/pkg/generator/generator.go @@ -37,7 +37,7 @@ func DefaultOptions() *Options { DocumentName: "sbomit-generated-sbom", DocumentVersion: "0.0.1", Authors: []string{}, - AttestationTypes: []string{"material", "command-run", "product", "network-trace"}, + AttestationTypes: []string{"material", "command-run", "product", "network-trace", "maven"}, OutputFormat: "spdx23", Catalog: "", ProjectDir: "", diff --git a/pkg/resolver/java.go b/pkg/resolver/java.go new file mode 100644 index 0000000..d8447e6 --- /dev/null +++ b/pkg/resolver/java.go @@ -0,0 +1,129 @@ +package resolver + +import ( + "path" + "regexp" + "strings" +) + +type JavaResolver struct { + mavenRe *regexp.Regexp + gradleRe *regexp.Regexp +} + +func NewJavaResolver() *JavaResolver { + return &JavaResolver{ + // Matches: .../.m2/repository/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar + // Group 1: path to group (com/google/guava) + // Group 2: artifactId (guava) + // Group 3: version (31.1-jre) + // Group 4: filename (guava-31.1-jre) + // Group 5: extension (jar|pom|war) + mavenRe: regexp.MustCompile(`\.m2/repository/(.+)/([^/]+)/([^/]+)/([^/]+)\.(jar|pom|war)$`), + + // Matches: .../.gradle/caches/modules-2/files-2.1/com.google.guava/guava/31.1-jre/hash/guava-31.1-jre.jar + // Group 1: groupId (com.google.guava) + // Group 2: artifactId (guava) + // Group 3: version (31.1-jre) + // Group 4: filename (guava-31.1-jre) + // Group 5: extension (jar|pom|war) + gradleRe: regexp.MustCompile(`\.gradle/caches/modules-2/files-2.1/([^/]+)/([^/]+)/([^/]+)/[^/]+/([^/]+)\.(jar|pom|war)$`), + } +} + +func (r *JavaResolver) Name() string { + return "java" +} + +func (r *JavaResolver) Resolve(files []FileInfo) (packages []PackageInfo, remainingFiles []FileInfo) { + seen := make(map[string]int) // purl -> index in packages + + for _, f := range files { + p := path.Clean(f.Path) + var groupId, artifactId, version string + + if m := r.mavenRe.FindStringSubmatch(p); len(m) == 6 { + // m[1]: group/path, m[2]: artifactId, m[3]: version, m[4]: filename, m[5]: ext + if m[4] == m[2]+"-"+m[3] { + groupId = strings.ReplaceAll(m[1], "/", ".") + artifactId = m[2] + version = m[3] + } + } else if m := r.gradleRe.FindStringSubmatch(p); len(m) == 6 { + // m[1]: groupId, m[2]: artifactId, m[3]: version, m[4]: filename, m[5]: ext + if m[4] == m[2]+"-"+m[3] { + groupId = m[1] + artifactId = m[2] + version = m[3] + } + } + + if groupId != "" && artifactId != "" && version != "" { + purl := "pkg:maven/" + groupId + "/" + artifactId + "@" + version + if idx, ok := seen[purl]; ok { + // Merge hashes if we see another file for same package (e.g., .pom and .jar) + if packages[idx].Hashes == nil { + packages[idx].Hashes = make(map[string]string) + } + for k, v := range f.Hashes { + packages[idx].Hashes[k] = v + } + } else { + seen[purl] = len(packages) + packages = append(packages, PackageInfo{ + Name: groupId + ":" + artifactId, + Version: version, + Ecosystem: "maven", + PURL: purl, + Hashes: f.Hashes, + FoundBy: "attestation:java", + }) + } + continue + } + + remainingFiles = append(remainingFiles, f) + } + + return packages, remainingFiles +} + +func (r *JavaResolver) CreateFileFilters(packages []PackageInfo) []PackageFileFilter { + var filters []PackageFileFilter + for _, pkg := range packages { + if pkg.Ecosystem == "maven" { + parts := strings.SplitN(pkg.Name, ":", 2) + if len(parts) == 2 { + filters = append(filters, &javaPackageFilter{ + groupId: parts[0], + artifactId: parts[1], + version: pkg.Version, + }) + } + } + } + return filters +} + +type javaPackageFilter struct { + groupId string + artifactId string + version string +} + +func (f *javaPackageFilter) Matches(p string) bool { + p = strings.ToLower(p) + groupPath := strings.ReplaceAll(f.groupId, ".", "/") + + // Maven match + if strings.Contains(p, "/.m2/repository/"+groupPath+"/"+f.artifactId+"/"+f.version+"/") { + return true + } + + // Gradle match + if strings.Contains(p, "/.gradle/caches/modules-2/files-2.1/"+f.groupId+"/"+f.artifactId+"/"+f.version+"/") { + return true + } + + return false +} diff --git a/pkg/resolver/java_test.go b/pkg/resolver/java_test.go new file mode 100644 index 0000000..6e97f2c --- /dev/null +++ b/pkg/resolver/java_test.go @@ -0,0 +1,181 @@ +package resolver + +import ( + "testing" +) + +func TestJavaResolver_Resolve(t *testing.T) { + tests := []struct { + name string + files []FileInfo + wantPackages []PackageInfo + wantRemaining int + }{ + { + name: "Maven local path - JAR", + files: []FileInfo{ + { + Path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.jar", + Hashes: map[string]string{"sha1": "abc"}, + }, + }, + wantPackages: []PackageInfo{ + { + Name: "org.slf4j:slf4j-api", + Version: "1.7.32", + Ecosystem: "maven", + PURL: "pkg:maven/org.slf4j/slf4j-api@1.7.32", + }, + }, + wantRemaining: 0, + }, + { + name: "Maven local path - POM", + files: []FileInfo{ + { + Path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.pom", + Hashes: map[string]string{"sha1": "abc"}, + }, + }, + wantPackages: []PackageInfo{ + { + Name: "org.slf4j:slf4j-api", + Version: "1.7.32", + Ecosystem: "maven", + PURL: "pkg:maven/org.slf4j/slf4j-api@1.7.32", + }, + }, + wantRemaining: 0, + }, + { + name: "Gradle cache path", + files: []FileInfo{ + { + Path: "/home/user/.gradle/caches/modules-2/files-2.1/com.google.guava/guava/31.1-jre/12345/guava-31.1-jre.jar", + Hashes: map[string]string{"sha256": "def"}, + }, + }, + wantPackages: []PackageInfo{ + { + Name: "com.google.guava:guava", + Version: "31.1-jre", + Ecosystem: "maven", + PURL: "pkg:maven/com.google.guava/guava@31.1-jre", + }, + }, + wantRemaining: 0, + }, + { + name: "Deduplication and Hash merging", + files: []FileInfo{ + { + Path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.jar", + Hashes: map[string]string{"sha256": "jar-hash"}, + }, + { + Path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.pom", + Hashes: map[string]string{"sha1": "pom-hash"}, + }, + }, + wantPackages: []PackageInfo{ + { + Name: "org.slf4j:slf4j-api", + Version: "1.7.32", + Ecosystem: "maven", + PURL: "pkg:maven/org.slf4j/slf4j-api@1.7.32", + Hashes: map[string]string{"sha256": "jar-hash", "sha1": "pom-hash"}, + }, + }, + wantRemaining: 0, + }, + { + name: "Unrelated file", + files: []FileInfo{ + {Path: "/home/user/project/README.md"}, + }, + wantPackages: nil, + wantRemaining: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := NewJavaResolver() + gotPackages, gotRemaining := r.Resolve(tt.files) + + if len(gotPackages) != len(tt.wantPackages) { + t.Fatalf("Resolve() got %v packages, want %v", len(gotPackages), len(tt.wantPackages)) + } + + for i, wp := range tt.wantPackages { + gp := gotPackages[i] + if gp.PURL != wp.PURL { + t.Errorf("Package[%d] PURL = %v, want %v", i, gp.PURL, wp.PURL) + } + if gp.Name != wp.Name { + t.Errorf("Package[%d] Name = %v, want %v", i, gp.Name, wp.Name) + } + if wp.Hashes != nil { + for k, v := range wp.Hashes { + if gp.Hashes[k] != v { + t.Errorf("Package[%d] Hash[%s] = %v, want %v", i, k, gp.Hashes[k], v) + } + } + } + } + + if len(gotRemaining) != tt.wantRemaining { + t.Errorf("Resolve() got %v remaining files, want %v", len(gotRemaining), tt.wantRemaining) + } + }) + } +} + +func TestJavaPackageFilter_Matches(t *testing.T) { + tests := []struct { + name string + groupId string + artifactId string + version string + path string + want bool + }{ + { + name: "Match Maven path", + groupId: "org.slf4j", + artifactId: "slf4j-api", + version: "1.7.32", + path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.jar", + want: true, + }, + { + name: "Match Gradle path", + groupId: "com.google.guava", + artifactId: "guava", + version: "31.1-jre", + path: "/root/.gradle/caches/modules-2/files-2.1/com.google.guava/guava/31.1-jre/abc/guava-31.1-jre.jar", + want: true, + }, + { + name: "Mismatch version", + groupId: "org.slf4j", + artifactId: "slf4j-api", + version: "1.7.32", + path: "/home/user/.m2/repository/org/slf4j/slf4j-api/1.7.33/slf4j-api-1.7.33.jar", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := &javaPackageFilter{ + groupId: tt.groupId, + artifactId: tt.artifactId, + version: tt.version, + } + if got := f.Matches(tt.path); got != tt.want { + t.Errorf("Matches() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/resolver/resolver.go b/pkg/resolver/resolver.go index c59cf57..b9943c3 100644 --- a/pkg/resolver/resolver.go +++ b/pkg/resolver/resolver.go @@ -42,10 +42,10 @@ func NewResolverChain() *ResolverChain { return &ResolverChain{ resolvers: []Resolver{ NewPythonResolver(), - // Add more resolvers here as they are implemented: NewGoResolver(), NewRustResolver(), NewJavaScriptResolver(), + NewJavaResolver(), }, filter: NewFileFilter(), } diff --git a/pkg/resolver/rust_test.go b/pkg/resolver/rust_test.go new file mode 100644 index 0000000..03580e0 --- /dev/null +++ b/pkg/resolver/rust_test.go @@ -0,0 +1,168 @@ +package resolver + +import ( + "testing" +) + +func TestRustResolver_Resolve(t *testing.T) { + tests := []struct { + name string + files []FileInfo + wantPackages []PackageInfo + wantRemaining int + }{ + { + name: "Valid Cargo registry path", + files: []FileInfo{ + {Path: "/home/user/.cargo/registry/src/github.com-1ecc6299db9ec823/serde-1.0.130/src/lib.rs"}, + }, + wantPackages: []PackageInfo{ + { + Name: "serde", + Version: "1.0.130", + Ecosystem: "cargo", + PURL: "pkg:cargo/serde@1.0.130", + }, + }, + wantRemaining: 1, + }, + { + name: "Cargo cache crate file", + files: []FileInfo{ + {Path: "/home/user/.cargo/registry/cache/github.com-1ecc6299db9ec823/tokio-1.28.2.crate"}, + }, + wantPackages: []PackageInfo{ + { + Name: "tokio", + Version: "1.28.2", + Ecosystem: "cargo", + PURL: "pkg:cargo/tokio@1.28.2", + }, + }, + wantRemaining: 1, + }, + { + name: "Multiple crates and ignored files", + files: []FileInfo{ + {Path: "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/regex-1.8.4/src/lib.rs"}, + {Path: "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/regex-1.8.4/target/debug/libregex.rlib"}, + {Path: "/home/user/.cargo/registry/src/index.crates.io-6f17d22bba15001f/libc-0.2.147/src/lib.rs"}, + }, + wantPackages: []PackageInfo{ + {Name: "regex", Version: "1.8.4", Ecosystem: "cargo", PURL: "pkg:cargo/regex@1.8.4"}, + {Name: "libc", Version: "0.2.147", Ecosystem: "cargo", PURL: "pkg:cargo/libc@0.2.147"}, + }, + wantRemaining: 2, + }, + { + name: "Path with version-like string in directory name but not crate", + files: []FileInfo{ + {Path: "/home/user/not-a-registry/some-package-1.2.3/src/main.rs"}, + }, + wantPackages: nil, + wantRemaining: 1, + }, + { + name: "Malformed version string", + files: []FileInfo{ + {Path: "/usr/local/cargo/registry/src/unrecognized/invalid-pkg/src/lib.rs"}, + }, + wantPackages: nil, + wantRemaining: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := NewRustResolver() + gotPackages, gotRemaining := r.Resolve(tt.files) + + if len(gotPackages) != len(tt.wantPackages) { + t.Errorf("Resolve() gotPackages length = %v, want %v", len(gotPackages), len(tt.wantPackages)) + return + } + + for _, wp := range tt.wantPackages { + found := false + for _, gp := range gotPackages { + if gp.Name == wp.Name && gp.Version == wp.Version && gp.PURL == wp.PURL { + found = true + break + } + } + if !found { + t.Errorf("Resolve() did not find expected package %+v", wp) + } + } + + if len(gotRemaining) != tt.wantRemaining { + t.Errorf("Resolve() gotRemaining length = %v, want %v", len(gotRemaining), tt.wantRemaining) + } + }) + } +} + +func TestRustPackageFilter_Matches(t *testing.T) { + tests := []struct { + name string + packageName string + version string + path string + want bool + }{ + { + name: "Match registry src path", + packageName: "serde", + version: "1.0.130", + path: "/home/user/.cargo/registry/src/github.com-1ecc6299db9ec823/serde-1.0.130/src/lib.rs", + want: true, + }, + { + name: "Match registry cache crate", + packageName: "tokio", + version: "1.28.2", + path: "/home/user/.cargo/registry/cache/github.com-1ecc6299db9ec823/tokio-1.28.2.crate", + want: true, + }, + { + name: "Match from crates directory", + packageName: "anyhow", + version: "1.0.71", + path: "/usr/local/cargo/crates/anyhow-1.0.71/src/lib.rs", + want: true, + }, + { + name: "Mismatch version", + packageName: "serde", + version: "1.0.130", + path: "/home/user/.cargo/registry/src/github.com-1ecc6299db9ec823/serde-1.0.131/src/lib.rs", + want: false, + }, + { + name: "Mismatch package name", + packageName: "serde", + version: "1.0.130", + path: "/home/user/.cargo/registry/src/github.com-1ecc6299db9ec823/serde_json-1.0.130/src/lib.rs", + want: false, + }, + { + name: "Non-registry path", + packageName: "serde", + version: "1.0.130", + path: "/home/user/not-a-registry/serde-1.0.130/src/lib.rs", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := &rustPackageFilter{ + packageName: tt.packageName, + version: tt.version, + } + if got := f.Matches(tt.path); got != tt.want { + t.Errorf("Matches() = %v, want %v", got, tt.want) + } + }) + } +}