Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions artifact-malware-quarantine-guard/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
frames/
__pycache__/
*.tmp
33 changes: 33 additions & 0 deletions artifact-malware-quarantine-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Artifact Malware Quarantine Guard

Self-contained Scientific/Engineering Data & Code Hosting slice for issue #14.

This module evaluates hosted uploads before they are exposed through metadata previews, reproduce-run buttons, API access, or export bundles. It uses synthetic artifact records only and does not call external scanners, storage systems, payment systems, or private services.

## What It Checks

- Denylisted checksums and malicious scan verdicts
- Stale or missing malware scan evidence
- Archive expansion, entry-count, and depth signals for archive bombs
- Macro-enabled spreadsheets
- Pickle/joblib model artifacts that can execute code when deserialized
- Notebook outputs with embedded scripts
- FAIR/DataCite/schema.org release evidence for clean artifacts

## Commands

```bash
npm run check
npm test
npm run demo
npm run demo:video
```

`npm run demo` writes deterministic JSON, Markdown, and SVG reviewer artifacts under `reports/`. `npm run demo:video` renders `reports/demo.mp4` from local synthetic frames.

## Safety

- Synthetic sample data only
- No live malware scanning, upload storage, or network calls
- No credentials, tokens, private research files, or payout data
- Release decisions are guard outputs, not production enforcement actions
24 changes: 24 additions & 0 deletions artifact-malware-quarantine-guard/acceptance-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Acceptance Notes

- Adds `artifact-malware-quarantine-guard/` as an independent module.
- Keeps all inputs synthetic and local.
- Uses dependency-free Node.js logic for deterministic guard decisions.
- Covers risky, clean, and stale-scan batches with tests.
- Generates reviewer artifacts:
- `reports/risky-packet.json`
- `reports/clean-packet.json`
- `reports/stale-scan-packet.json`
- `reports/quarantine-report.md`
- `reports/summary.svg`
- `reports/demo.mp4`

## Local Validation

Run:

```bash
npm run check
npm test
npm run demo
npm run demo:video
```
82 changes: 82 additions & 0 deletions artifact-malware-quarantine-guard/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
const fs = require('fs');
const path = require('path');

const { assessArtifactBatch } = require('./index');
const { riskyBatch, cleanBatch, staleScanBatch } = require('./sample-data');

const reportsDir = path.join(__dirname, 'reports');
fs.mkdirSync(reportsDir, { recursive: true });

const packets = [
['risky-packet.json', assessArtifactBatch(riskyBatch)],
['clean-packet.json', assessArtifactBatch(cleanBatch)],
['stale-scan-packet.json', assessArtifactBatch(staleScanBatch)]
];

for (const [fileName, packet] of packets) {
fs.writeFileSync(path.join(reportsDir, fileName), `${JSON.stringify(packet, null, 2)}\n`);
}

fs.writeFileSync(path.join(reportsDir, 'quarantine-report.md'), renderMarkdown(packets));
fs.writeFileSync(path.join(reportsDir, 'summary.svg'), renderSvg(packets));

for (const [fileName, packet] of packets) {
console.log(`${fileName}: ${packet.status}; findings=${packet.findings.length}; digest=${packet.auditDigest.slice(0, 12)}`);
}

function renderMarkdown(packetRows) {
const lines = [
'# Artifact Malware Quarantine Report',
'',
'| Packet | Status | Preview | Reproduce | API | Export | Findings |',
'| --- | --- | --- | --- | --- | --- | --- |'
];

for (const [fileName, packet] of packetRows) {
lines.push([
fileName,
packet.status,
packet.releaseLanes.preview,
packet.releaseLanes.reproduce,
packet.releaseLanes.apiAccess,
packet.releaseLanes.export,
packet.findings.map((finding) => finding.code).join(', ') || 'none'
].join(' | ').replace(/^/, '| ').replace(/$/, ' |'));
}

lines.push('');
lines.push('All packets use synthetic artifact records and deterministic SHA-256 audit digests.');
return `${lines.join('\n')}\n`;
}

function renderSvg(packetRows) {
const rows = packetRows.map(([, packet], index) => {
const y = 105 + index * 72;
const color = packet.status === 'quarantine_uploads' ? '#dc2626' : packet.status === 'hold_for_rescan' ? '#d97706' : '#16a34a';
return `
<g transform="translate(48 ${y})">
<rect width="1104" height="50" rx="6" fill="#f8fafc" stroke="#cbd5e1"/>
<circle cx="28" cy="25" r="11" fill="${color}"/>
<text x="58" y="21" font-size="18" font-family="Arial" fill="#0f172a">${escapeXml(packet.batchId)}</text>
<text x="58" y="39" font-size="13" font-family="Arial" fill="#475569">${escapeXml(packet.status)} | findings ${packet.findings.length} | digest ${packet.auditDigest.slice(0, 16)}</text>
</g>`;
}).join('');

return [
'<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="360" viewBox="0 0 1200 360">',
' <rect width="1200" height="360" fill="#e2e8f0"/>',
' <text x="48" y="52" font-size="31" font-family="Arial" font-weight="700" fill="#0f172a">Artifact Malware Quarantine Guard</text>',
' <text x="48" y="80" font-size="16" font-family="Arial" fill="#334155">Preview, reproduce, API, and export lanes are gated before hosted research artifacts are released.</text>',
rows,
'</svg>',
''
].join('\n');
}

function escapeXml(value) {
return String(value)
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;');
}
217 changes: 217 additions & 0 deletions artifact-malware-quarantine-guard/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
const crypto = require('crypto');

const DEFAULT_POLICY = {
maxScanAgeHours: 24,
maxArchiveExpansionRatio: 20,
maxArchiveDepth: 8,
maxArchiveEntries: 10000,
deniedChecksums: [],
allowedChecksums: []
};

function assessArtifactBatch(batch) {
const policy = { ...DEFAULT_POLICY, ...(batch.policy || {}) };
const findings = batch.artifacts.flatMap((artifact) =>
assessArtifact(artifact, batch.now, policy)
);
const blockerCount = findings.filter((finding) => finding.severity === 'blocker').length;
const staleScanCount = findings.filter((finding) => finding.code === 'STALE_SCAN_EVIDENCE').length;

const packet = {
batchId: batch.batchId,
projectId: batch.projectId,
status: chooseStatus(blockerCount, staleScanCount, findings.length),
releaseLanes: chooseReleaseLanes(blockerCount, staleScanCount),
findings,
actions: buildActions(findings),
metadata: buildMetadata(batch),
fairSignals: buildFairSignals(batch, findings),
assessedAt: batch.now
};

packet.auditDigest = digestPacket(packet);
return packet;
}

function assessArtifact(artifact, now, policy) {
const findings = [];

if (policy.deniedChecksums.includes(artifact.sha256)) {
findings.push(finding(artifact, 'DENYLISTED_CHECKSUM', 'blocker', 'Artifact checksum is on the denylist.'));
}

if (artifact.scan?.verdict === 'malicious') {
findings.push(finding(
artifact,
'MALWARE_SIGNATURE_MATCH',
'blocker',
`Malware scan reported ${artifact.scan.signatures?.join(', ') || 'a malicious signature'}.`
));
}

const scanAgeHours = ageHours(artifact.scan?.scannedAt, now);
if (scanAgeHours === null || scanAgeHours > policy.maxScanAgeHours) {
findings.push(finding(artifact, 'STALE_SCAN_EVIDENCE', 'hold', 'Scan evidence is missing or older than policy allows.'));
}

if (artifact.archive) {
const ratio = artifact.archive.expandedBytes / Math.max(artifact.archive.compressedBytes, 1);
if (
ratio > policy.maxArchiveExpansionRatio ||
artifact.archive.entryCount > policy.maxArchiveEntries ||
artifact.archive.maxDepth > policy.maxArchiveDepth
) {
findings.push(finding(
artifact,
'ARCHIVE_EXPANSION_BOMB',
'blocker',
`Archive expands ${ratio.toFixed(1)}x with ${artifact.archive.entryCount} entries and depth ${artifact.archive.maxDepth}.`
));
}
}

if (artifact.containsMacros || /macroEnabled/i.test(artifact.declaredMime || '')) {
findings.push(finding(artifact, 'MACRO_ENABLED_SPREADSHEET', 'blocker', 'Macro-enabled spreadsheet must stay quarantined until manually reviewed.'));
}

if (['pickle', 'joblib'].includes(String(artifact.modelFormat || '').toLowerCase())) {
findings.push(finding(artifact, 'UNSAFE_MODEL_DESERIALIZATION', 'blocker', 'Model artifact uses a deserialization format that can execute code.'));
}

if (artifact.notebook?.embeddedScripts?.length) {
findings.push(finding(artifact, 'NOTEBOOK_EMBEDDED_SCRIPT', 'blocker', 'Notebook output contains embedded scripts that cannot be previewed safely.'));
}

return findings;
}

function finding(artifact, code, severity, message) {
return {
artifactId: artifact.id,
fileName: artifact.name,
code,
severity,
message
};
}

function ageHours(scannedAt, now) {
if (!scannedAt || !now) return null;
const scanTime = Date.parse(scannedAt);
const nowTime = Date.parse(now);
if (Number.isNaN(scanTime) || Number.isNaN(nowTime)) return null;
return Math.max(0, (nowTime - scanTime) / (60 * 60 * 1000));
}

function chooseStatus(blockerCount, staleScanCount, findingCount) {
if (blockerCount > 0) return 'quarantine_uploads';
if (staleScanCount > 0) return 'hold_for_rescan';
if (findingCount > 0) return 'release_with_warnings';
return 'release_with_monitoring';
}

function chooseReleaseLanes(blockerCount, staleScanCount) {
if (blockerCount > 0) {
return {
preview: 'blocked',
reproduce: 'blocked',
apiAccess: 'blocked',
export: 'blocked'
};
}

if (staleScanCount > 0) {
return {
preview: 'metadata_only',
reproduce: 'blocked',
apiAccess: 'metadata_only',
export: 'blocked'
};
}

return {
preview: 'allowed',
reproduce: 'allowed',
apiAccess: 'allowed',
export: 'allowed'
};
}

function buildActions(findings) {
const actions = [];
const quarantineIds = new Set();
const rescanIds = new Set();

for (const item of findings) {
if (item.severity === 'blocker') quarantineIds.add(item.artifactId);
if (item.code === 'STALE_SCAN_EVIDENCE') rescanIds.add(item.artifactId);
}

for (const id of [...quarantineIds].sort()) actions.push(`quarantine:${id}`);
for (const id of [...rescanIds].sort()) actions.push(`request_rescan:${id}`);

if (!actions.length) actions.push('release_with_continuous_monitoring');
return actions;
}

function buildMetadata(batch) {
const primary = batch.artifacts[0] || {};
return {
dataCite: {
identifier: batch.batchId,
resourceType: inferResourceType(primary),
titles: [primary.metadata?.title || primary.name || batch.batchId],
rights: primary.metadata?.license || 'unspecified'
},
schemaOrg: {
'@type': 'Dataset',
name: primary.metadata?.title || primary.name || batch.batchId,
encodingFormat: primary.detectedMime || primary.declaredMime || 'application/octet-stream',
keywords: primary.metadata?.keywords || []
}
};
}

function inferResourceType(artifact) {
if (/parquet|csv|json|spreadsheet/i.test(`${artifact.detectedMime || ''} ${artifact.name || ''}`)) return 'Dataset';
if (/ipynb|python|r-|julia|javascript/i.test(`${artifact.detectedMime || ''} ${artifact.name || ''}`)) return 'Software';
if (/model|pickle|onnx|tensorflow/i.test(`${artifact.detectedMime || ''} ${artifact.name || ''}`)) return 'Model';
return 'Research artifact';
}

function buildFairSignals(batch, findings) {
const blockerCount = findings.filter((item) => item.severity === 'blocker').length;
const staleScanCount = findings.filter((item) => item.code === 'STALE_SCAN_EVIDENCE').length;
const hasMetadata = batch.artifacts.some((artifact) => artifact.metadata?.license && artifact.metadata?.keywords?.length);

return {
findable: Boolean(batch.batchId && batch.projectId),
accessible: blockerCount === 0,
interoperable: batch.artifacts.every((artifact) => Boolean(artifact.detectedMime || artifact.declaredMime)),
reusable: blockerCount === 0 && staleScanCount === 0 && hasMetadata
};
}

function digestPacket(packet) {
const digestSource = stableStringify({
...packet,
auditDigest: undefined
});
return crypto.createHash('sha256').update(digestSource).digest('hex');
}

function stableStringify(value) {
if (Array.isArray(value)) return `[${value.map(stableStringify).join(',')}]`;
if (value && typeof value === 'object') {
return `{${Object.keys(value)
.filter((key) => value[key] !== undefined)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableStringify(value[key])}`)
.join(',')}}`;
}
return JSON.stringify(value);
}

module.exports = {
assessArtifactBatch
};
Loading