Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ Flags:
-n, --name string Name for the SBOM document (default "sbomit-sbom")
-o, --output string Output file path (default: stdout)
--project-dir string Project directory to scan with the cataloger (default: current directory)
--types strings Attestation types to parse (comma-separated). (default [material,command-run,product,network-trace])
--types strings Attestation types to parse (comma-separated). (default [material,command-run,product,network-trace,maven])
-v, --version string Version for the SBOM document (default "0.0.1")
```

By default, `sbomit` parses `material`, `command-run`, and `product` attestations. To restrict parsing on demand:
By default, `sbomit` parses `material`, `command-run`, `product`, `network-trace`, and `maven` attestations. To restrict parsing on demand:

```bash
sbomit generate attestation.json --types command-run
Expand All @@ -55,9 +55,11 @@ sbomit generate attestation.json --catalog syft --project-dir /path/to/project
### Attestation Extractors

Modular extractors for different attestation types:
- `MaterialExtractor` - Build Input materials
- `MaterialExtractor` - Build input materials
- `CommandRunExtractor` - Opened files from processes
- `ProductExtractor` - Built artifacts
- `NetworkTrace` - External download connections
- `MavenExtractor` - Structured Java dependencies

Implement `Extractor` interface to add new types.

Expand All @@ -68,14 +70,16 @@ Ecosystem-specific package resolvers that extract packages from file paths:
- `GoResolver` - Resolves from module cache paths under `pkg/mod/`
- `RustResolver` - Resolves from Cargo registry paths
- `JavaScriptResolver` - Resolves pnpm-style paths under `node_modules/.pnpm/`
- `JavaResolver` - Resolves from Maven local repo (`.m2`) and Gradle caches

Each resolver implements `Resolver` and optionally `PackageFileFilterer` to filter its own package files.

### Processing Pipeline

```
Attestation → Extract Files → Filter Cache Files →
Run Resolvers → Filter Package Files → Generate SBOM
Attestation → Extract Files & Network Conns → Run Resolvers →
Filter Package Files → Resolve Network PURLs → Merge with Catalog (Syft) →
Generate SBOM Document
```

## Testing
Expand Down
9 changes: 4 additions & 5 deletions cmd/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ var generateCmd = &cobra.Command{
Short: "Generate an SBOM from witness attestations",
Long: `Generate a Software Bill of Materials (SBOM) from witness attestation files.

This command parses witness attestations, extracts file and network information
from material, command-run, product, and network-trace attestations, resolves
files and network connections to packages by ecosystem, and outputs an SBOM in
the specified format.
This command parses witness attestations, extracts file and network information from material, command-run, product,
network-trace, and maven attestations, resolves files and network connections
to packages by ecosystem, and outputs an SBOM in the specified format.

Supported output formats:
- spdx23 (default): SPDX 2.3 JSON format
Expand Down Expand Up @@ -56,7 +55,7 @@ func init() {
generateCmd.Flags().StringVarP(&documentName, "name", "n", "sbomit-sbom", "Name for the SBOM document")
generateCmd.Flags().StringVarP(&documentVersion, "version", "v", "0.0.1", "Version for the SBOM document")
generateCmd.Flags().StringSliceVar(&authors, "author", []string{}, "Document authors (can be specified multiple times)")
generateCmd.Flags().StringSliceVar(&attestationTypes, "types", []string{"material", "command-run", "product", "network-trace"}, "Attestation types to parse (comma-separated).")
generateCmd.Flags().StringSliceVar(&attestationTypes, "types", []string{"material", "command-run", "product", "network-trace", "maven"}, "Attestation types to parse (comma-separated).")
generateCmd.Flags().StringVarP(&catalog, "catalog", "c", "", "Cataloger to run before processing attestations (supported: syft)")
generateCmd.Flags().StringVar(&projectDir, "project-dir", "", "Project directory to scan with the cataloger (default: current directory)")
}
Expand Down
1 change: 1 addition & 0 deletions pkg/attestation/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func NewExtractorChain() *ExtractorChain {
chain.RegisterExtractor(NewMaterialExtractor())
chain.RegisterExtractor(NewCommandRunExtractor())
chain.RegisterExtractor(NewProductExtractor())
chain.RegisterExtractor(NewMavenExtractor())

return chain
}
Expand Down
75 changes: 75 additions & 0 deletions pkg/attestation/maven_extractor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package attestation

import (
"fmt"
)

// MavenExtractor extracts Maven dependencies from witness maven attestations
type MavenExtractor struct{}

func NewMavenExtractor() *MavenExtractor {
return &MavenExtractor{}
}

func (e *MavenExtractor) Name() string {
return "maven"
}

func (e *MavenExtractor) Extract(data map[string]interface{}) []FileInfo {
var files []FileInfo

maven, ok := data["maven"].(map[string]interface{})
if !ok {
return nil
}

deps, ok := maven["dependencies"].([]interface{})
if !ok {
return nil
}

for _, d := range deps {
dep, ok := d.(map[string]interface{})
if !ok {
continue
}

groupId, _ := dep["groupId"].(string)
artifactId, _ := dep["artifactId"].(string)
version, _ := dep["version"].(string)

if groupId == "" || artifactId == "" || version == "" {
continue
}

// Convert back to structured path so JavaResolver can process it
// This path is virtual and used for coordinate extraction
groupPath := ""
for _, part := range fmt.Sprintf("%s", groupId) {
if part == '.' {
groupPath += "/"
} else {
groupPath += string(part)
}
}

virtualPath := fmt.Sprintf("/virtual/.m2/repository/%s/%s/%s/%s-%s.jar",
groupPath, artifactId, version, artifactId, version)

hashes := make(map[string]string)
if h, ok := dep["hashes"].(map[string]interface{}); ok {
for k, v := range h {
if hashVal, ok := v.(string); ok {
hashes[k] = hashVal
}
}
}

files = append(files, FileInfo{
Path: virtualPath,
Hashes: hashes,
})
}

return files
}
38 changes: 38 additions & 0 deletions pkg/attestation/maven_extractor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package attestation

import (
"testing"
)

func TestMavenExtractor_Extract(t *testing.T) {
e := NewMavenExtractor()
data := map[string]interface{}{
"maven": map[string]interface{}{
"dependencies": []interface{}{
map[string]interface{}{
"groupId": "org.slf4j",
"artifactId": "slf4j-api",
"version": "1.7.32",
"hashes": map[string]interface{}{
"sha1": "abc",
},
},
},
},
}

files := e.Extract(data)
if len(files) != 1 {
t.Fatalf("Extract() got %v files, want 1", len(files))
}

f := files[0]
expectedPath := "/virtual/.m2/repository/org/slf4j/slf4j-api/1.7.32/slf4j-api-1.7.32.jar"
if f.Path != expectedPath {
t.Errorf("Path = %v, want %v", f.Path, expectedPath)
}

if f.Hashes["sha1"] != "abc" {
t.Errorf("Hash[sha1] = %v, want abc", f.Hashes["sha1"])
}
}
2 changes: 1 addition & 1 deletion pkg/generator/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func DefaultOptions() *Options {
DocumentName: "sbomit-generated-sbom",
DocumentVersion: "0.0.1",
Authors: []string{},
AttestationTypes: []string{"material", "command-run", "product", "network-trace"},
AttestationTypes: []string{"material", "command-run", "product", "network-trace", "maven"},
OutputFormat: "spdx23",
Catalog: "",
ProjectDir: "",
Expand Down
129 changes: 129 additions & 0 deletions pkg/resolver/java.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package resolver

import (
"path"
"regexp"
"strings"
)

type JavaResolver struct {
mavenRe *regexp.Regexp
gradleRe *regexp.Regexp
}

func NewJavaResolver() *JavaResolver {
return &JavaResolver{
// Matches: .../.m2/repository/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar
// Group 1: path to group (com/google/guava)
// Group 2: artifactId (guava)
// Group 3: version (31.1-jre)
// Group 4: filename (guava-31.1-jre)
// Group 5: extension (jar|pom|war)
mavenRe: regexp.MustCompile(`\.m2/repository/(.+)/([^/]+)/([^/]+)/([^/]+)\.(jar|pom|war)$`),

// Matches: .../.gradle/caches/modules-2/files-2.1/com.google.guava/guava/31.1-jre/hash/guava-31.1-jre.jar
// Group 1: groupId (com.google.guava)
// Group 2: artifactId (guava)
// Group 3: version (31.1-jre)
// Group 4: filename (guava-31.1-jre)
// Group 5: extension (jar|pom|war)
gradleRe: regexp.MustCompile(`\.gradle/caches/modules-2/files-2.1/([^/]+)/([^/]+)/([^/]+)/[^/]+/([^/]+)\.(jar|pom|war)$`),
}
}

func (r *JavaResolver) Name() string {
return "java"
}

func (r *JavaResolver) Resolve(files []FileInfo) (packages []PackageInfo, remainingFiles []FileInfo) {
seen := make(map[string]int) // purl -> index in packages

for _, f := range files {
p := path.Clean(f.Path)
var groupId, artifactId, version string

if m := r.mavenRe.FindStringSubmatch(p); len(m) == 6 {
// m[1]: group/path, m[2]: artifactId, m[3]: version, m[4]: filename, m[5]: ext
if m[4] == m[2]+"-"+m[3] {
groupId = strings.ReplaceAll(m[1], "/", ".")
artifactId = m[2]
version = m[3]
}
} else if m := r.gradleRe.FindStringSubmatch(p); len(m) == 6 {
// m[1]: groupId, m[2]: artifactId, m[3]: version, m[4]: filename, m[5]: ext
if m[4] == m[2]+"-"+m[3] {
groupId = m[1]
artifactId = m[2]
version = m[3]
}
}

if groupId != "" && artifactId != "" && version != "" {
purl := "pkg:maven/" + groupId + "/" + artifactId + "@" + version
if idx, ok := seen[purl]; ok {
// Merge hashes if we see another file for same package (e.g., .pom and .jar)
if packages[idx].Hashes == nil {
packages[idx].Hashes = make(map[string]string)
}
for k, v := range f.Hashes {
packages[idx].Hashes[k] = v
}
} else {
seen[purl] = len(packages)
packages = append(packages, PackageInfo{
Name: groupId + ":" + artifactId,
Version: version,
Ecosystem: "maven",
PURL: purl,
Hashes: f.Hashes,
FoundBy: "attestation:java",
})
}
continue
}

remainingFiles = append(remainingFiles, f)
}

return packages, remainingFiles
}

func (r *JavaResolver) CreateFileFilters(packages []PackageInfo) []PackageFileFilter {
var filters []PackageFileFilter
for _, pkg := range packages {
if pkg.Ecosystem == "maven" {
parts := strings.SplitN(pkg.Name, ":", 2)
if len(parts) == 2 {
filters = append(filters, &javaPackageFilter{
groupId: parts[0],
artifactId: parts[1],
version: pkg.Version,
})
}
}
}
return filters
}

type javaPackageFilter struct {
groupId string
artifactId string
version string
}

func (f *javaPackageFilter) Matches(p string) bool {
p = strings.ToLower(p)
groupPath := strings.ReplaceAll(f.groupId, ".", "/")

// Maven match
if strings.Contains(p, "/.m2/repository/"+groupPath+"/"+f.artifactId+"/"+f.version+"/") {
return true
}

// Gradle match
if strings.Contains(p, "/.gradle/caches/modules-2/files-2.1/"+f.groupId+"/"+f.artifactId+"/"+f.version+"/") {
return true
}

return false
}
Loading