diff --git a/packages/cli/skills/dkg-node/SKILL.md b/packages/cli/skills/dkg-node/SKILL.md
index c8542c769..10a87061b 100644
--- a/packages/cli/skills/dkg-node/SKILL.md
+++ b/packages/cli/skills/dkg-node/SKILL.md
@@ -121,17 +121,29 @@ The token is configured in the node's config file or provided at startup.
   - **Note:** `subGraphName` is supported for legacy routing only and cannot be combined with `view`
 - `POST /api/query-remote` — query a remote peer via P2P
 
-### Working Memory (WM) — Private assertions (🚧 Planned)
+### Working Memory (WM) — Private assertions
 
-> The following WM assertion endpoints are planned for a future release:
+WM assertions are your agent-local drafts — private to you, readable and
+writable only by your peer ID, never gossiped. Use them to stage knowledge
+before sharing it to SWM (team) or promoting it to VM (chain-anchored).
 
 - `POST /api/assertion/create` — create a named private assertion
-- `PUT /api/assertion/{name}` — write triples to an assertion
-- `POST /api/assertion/{name}/import` — import N-Triples/Turtle/JSON-LD
-- `POST /api/assertion/{name}/import-file` — import PDF/DOCX/Markdown (multipart)
-- `GET /api/assertion/{name}` — read assertion contents
-- `DELETE /api/assertion/{name}` — delete assertion
-- `POST /api/assertion/{name}/promote` — promote assertion to SWM
+  Body: `{ "contextGraphId": "...", "name": "...", "subGraphName"?: "..." }`
+- `POST /api/assertion/{name}/write` — write triples to an assertion
+  Body: `{ "contextGraphId": "...", "quads": [...], "subGraphName"?: "..." }`
+- `POST /api/assertion/{name}/query` — read assertion contents as quads
+  Body: `{ "contextGraphId": "...", "subGraphName"?: "..." }`
+- `POST /api/assertion/{name}/promote` — promote assertion triples to SWM
+  Body: `{ "contextGraphId": "...", "entities"?: [...] | "all", "subGraphName"?: "..." }`
+- `POST /api/assertion/{name}/discard` — drop the assertion graph
+  Body: `{ "contextGraphId": "...", "subGraphName"?: "..." }`
+- `POST /api/assertion/{name}/import-file` — import a document (multipart/form-data) — see §7
+- `GET /api/assertion/{name}/extraction-status?contextGraphId=...` — poll the status of an import-file extraction job
+
+> If `subGraphName` is provided but the sub-graph is not registered in the CG's
+> `_meta` graph, all assertion operations throw
+> `Sub-graph "{name}" has not been registered in context graph "{id}". Call createSubGraph() first.`
+> Create the sub-graph before targeting it.
 
 ## 6. Context Graphs
 
@@ -145,22 +157,83 @@ Context Graphs are scoped knowledge domains with configurable access and governa
 - 🚧 `POST /api/context-graph/{id}/ontology` — add ontology *(planned)*
 - 🚧 `GET /api/context-graph/{id}/ontology` — list ontologies *(planned)*
 
-## 7. File Ingestion (🚧 Planned)
+## 7. File Ingestion
 
-> File ingestion via `import-file` depends on the Working Memory assertion API (§5)
-> and will be available when those endpoints ship. The extraction pipeline
-> infrastructure (MarkItDown converter) is already in place on the node.
+Upload a document (PDF, DOCX, HTML, CSV, Markdown, etc.) and let the node
+extract RDF triples into a WM assertion. The node runs a deterministic
+two-phase pipeline:
 
-Supported formats depend on available extraction pipelines (see Node Info §1).
-When available, usage will be:
+1. **Phase 1 (optional converter):** non-Markdown formats go through a
+   registered converter (e.g. MarkItDown for PDF/DOCX/HTML) which produces
+   a Markdown intermediate. `text/markdown` uploads skip Phase 1 — the raw
+   file IS the intermediate.
+2. **Phase 2 (structural extractor):** the Markdown intermediate is parsed
+   for YAML frontmatter, wikilinks (`[[Target]]`), hashtags (`#keyword`),
+   Dataview inline fields (`key:: value`), and heading structure. No LLM —
+   deterministic, node-side, no external calls.
+
+The extracted triples are written to the target assertion graph via the
+same path as `POST /api/assertion/{name}/write`. Agents can then query,
+promote, or publish them like any other assertion content.
+
+**Supported formats:** see Node Info §1 for the list of registered
+extraction pipelines on your specific node. `text/markdown` is always
+supported (no converter needed).
+
+### Request
+
+`POST /api/assertion/{name}/import-file` with `Content-Type: multipart/form-data`:
+
+| Field           | Required | Description                                                                 |
+|-----------------|----------|-----------------------------------------------------------------------------|
+| `file`          | yes      | The document bytes                                                          |
+| `contextGraphId`| yes      | Target context graph                                                        |
+| `contentType`   | no       | Override the file part's Content-Type header                                |
+| `ontologyRef`   | no       | CG `_ontology` URI for guided Phase 2 extraction                            |
+| `subGraphName`  | no       | Target sub-graph inside the CG (must be registered via `createSubGraph`)    |
+
+### Example
 
 ```bash
-curl -X POST $BASE_URL/api/assertion/my-assertion/import-file \
+curl -X POST $BASE_URL/api/assertion/climate-report/import-file \
   -H "Authorization: Bearer $TOKEN" \
-  -F "file=@paper.pdf" \
-  -F "contextGraph=my-context-graph"
+  -F "file=@climate-2026.md;type=text/markdown" \
+  -F "contextGraphId=research"
+```
+
+### Response
+
+```json
+{
+  "assertionUri": "did:dkg:context-graph:research/assertion/0xAgentAddr/climate-report",
+  "fileHash": "sha256:a1b2c3...",
+  "detectedContentType": "text/markdown",
+  "extraction": {
+    "status": "completed",
+    "tripleCount": 14,
+    "pipelineUsed": "text/markdown",
+    "mdIntermediateHash": "sha256:a1b2c3..."
+  }
+}
 ```
 
+### Extraction statuses
+
+- `completed` — Phase 1 (if needed) and Phase 2 both ran; triples were written to the assertion graph
+- `skipped` — no converter is registered for the file's content type; the file is stored in the file store but no triples were written. Agents can still reference the file via its `fileHash`
+- `failed` — one of the phases threw an error; check the `error` field in the response. The file is still stored; no triples written.
+
+For synchronous extractions (the V10.0 default) the response carries the
+final status immediately. To re-query later without holding the original
+response, use:
+
+```bash
+curl $BASE_URL/api/assertion/climate-report/extraction-status?contextGraphId=research \
+  -H "Authorization: Bearer $TOKEN"
+```
+
+Returns the same `{ status, fileHash, pipelineUsed, tripleCount, ... }` shape from the in-memory extraction status tracker, or 404 if no import-file has been run for that assertion.
+
 ## 8. Node Administration
 
 - `GET /api/status` (PUBLIC) — node status, peer ID, version, connections
diff --git a/packages/cli/src/daemon.ts b/packages/cli/src/daemon.ts
index 7c011405a..343f7acc8 100644
--- a/packages/cli/src/daemon.ts
+++ b/packages/cli/src/daemon.ts
@@ -12,7 +12,7 @@ import { fileURLToPath } from 'node:url';
 import { stat } from 'node:fs/promises';
 import { ethers } from 'ethers';
 import { DKGAgent, loadOpWallets } from '@origintrail-official/dkg-agent';
-import { computeNetworkId, createOperationContext, DKGEvent, Logger, PayloadTooLargeError, GET_VIEWS, validateSubGraphName, validateAssertionName, validateContextGraphId, isSafeIri, contextGraphSharedMemoryUri } from '@origintrail-official/dkg-core';
+import { computeNetworkId, createOperationContext, DKGEvent, Logger, PayloadTooLargeError, GET_VIEWS, validateSubGraphName, validateAssertionName, validateContextGraphId, isSafeIri, contextGraphSharedMemoryUri, contextGraphAssertionUri } from '@origintrail-official/dkg-core';
 import {
   DashboardDB,
   MetricsCollector,
@@ -54,7 +54,10 @@ import {
 import { startPublisherRuntimeIfEnabled, createPublisherInspectorFromStore, type PublisherRuntime, type PublisherInspector } from './publisher-runner.js';
 import { loadTokens, httpAuthGuard, extractBearerToken } from './auth.js';
 import { ExtractionPipelineRegistry } from '@origintrail-official/dkg-core';
-import { MarkItDownConverter, isMarkItDownAvailable } from './extraction/index.js';
+import { MarkItDownConverter, isMarkItDownAvailable, extractFromMarkdown } from './extraction/index.js';
+import { type ExtractionStatusRecord, getExtractionStatusRecord, setExtractionStatusRecord } from './extraction-status.js';
+import { FileStore } from './file-store.js';
+import { parseBoundary, parseMultipart, MultipartParseError } from './http/multipart.js';
 import { handleCapture, EpcisValidationError, handleEventsQuery, EpcisQueryError, type Publisher as EpcisPublisher } from '@origintrail-official/dkg-epcis';
 import { readFileSync } from 'node:fs';
 
@@ -143,6 +146,11 @@ export function parseRequiredSignatures(raw: unknown): { value: number } | { err
   return { value: raw };
 }
 
+function normalizeDetectedContentType(contentType: string | undefined): string {
+  const normalized = contentType?.split(';', 1)[0]?.trim().toLowerCase();
+  return normalized && normalized.length > 0 ? normalized : 'application/octet-stream';
+}
+
 const lastUpdateCheck = { upToDate: true, checkedAt: 0, latestCommit: '', latestVersion: '' };
 let isUpdating = false;
 
@@ -814,11 +822,27 @@ async function runDaemonInner(foreground: boolean, config: Awaited<ReturnType<ty
   const extractionRegistry = new ExtractionPipelineRegistry();
   if (isMarkItDownAvailable()) {
     extractionRegistry.register(new MarkItDownConverter());
-    log(`Extraction pipelines: ${extractionRegistry.availableContentTypes().join(', ')}`);
-  } else {
-    log('MarkItDown binary not found — document extraction unavailable (files stored as blobs)');
+  }
+  // text/markdown is always natively handled by the import-file route
+  // regardless of converter registration; report the full effective set so
+  // operators see the same list that /.well-known/skill.md advertises.
+  const supportedIngestionTypes = [
+    ...new Set(['text/markdown', ...extractionRegistry.availableContentTypes()]),
+  ];
+  log(`Extraction pipelines: ${supportedIngestionTypes.join(', ')}`);
+  if (!isMarkItDownAvailable()) {
+    log('MarkItDown binary not found — non-markdown document extraction unavailable (files stored as blobs)');
   }
 
+  // --- File Store ---
+
+  const fileStore = new FileStore(join(dkgDir(), 'files'));
+
+  // In-memory extraction job status tracker. Synchronous extractions (the V10.0
+  // default) populate this with a completed record on the same request; async
+  // workflows can be layered later without changing the endpoint contract.
+  const extractionStatus = new Map<string, ExtractionStatusRecord>();
+
   // --- HTTP API ---
 
   const rateLimiter = new HttpRateLimiter(
@@ -927,6 +951,8 @@ async function runDaemonInner(foreground: boolean, config: Awaited<ReturnType<ty
         nodeCommit,
         catchupTracker,
         extractionRegistry,
+        fileStore,
+        extractionStatus,
         publisherInspector,
       );
     } catch (err: any) {
@@ -1236,6 +1262,8 @@ async function handleRequest(
   nodeCommit: string,
   catchupTracker: CatchupTracker,
   extractionRegistry: ExtractionPipelineRegistry,
+  fileStore: FileStore,
+  extractionStatus: Map<string, ExtractionStatusRecord>,
   publisherInspector: PublisherInspector,
 ): Promise<void> {
   const url = new URL(req.url ?? '/', `http://${req.headers.host}`);
@@ -1246,13 +1274,18 @@ async function handleRequest(
     const proto = req.headers['x-forwarded-proto'] ?? 'http';
     const host = req.headers['x-forwarded-host'] ?? req.headers.host ?? `localhost:${config.listenPort ?? 9200}`;
     const baseUrl = `${proto}://${host}`;
+    // text/markdown is always handled natively by the import-file route
+    // (skip Phase 1, run the Phase 2 markdown extractor directly), even when
+    // no Phase 1 converter is registered. Surface it in the discovery list so
+    // skill-driven clients see Markdown ingestion as supported regardless of
+    // converter availability.
     const pipelines = extractionRegistry.availableContentTypes();
     const content = buildSkillMd({
       version: nodeVersion,
       baseUrl,
       peerId: agent.peerId,
       nodeRole: config.nodeRole ?? 'edge',
-      extractionPipelines: [...new Set(pipelines)],
+      extractionPipelines: [...new Set(['text/markdown', ...pipelines])],
     });
     const etag = skillEtag(content);
     if (req.headers['if-none-match'] === etag) {
@@ -2202,6 +2235,344 @@ async function handleRequest(
     }
   }
 
+  // POST /api/assertion/:name/import-file  (multipart/form-data)
+  //   file (required):           the uploaded document bytes
+  //   contextGraphId (required): target context graph
+  //   contentType (optional):    override the file part's Content-Type
+  //   ontologyRef (optional):    CG _ontology URI for guided Phase 2 extraction
+  //   subGraphName (optional):   target sub-graph inside the CG
+  //
+  // Orchestration:
+  //   1. Parse multipart, store original file in file store → fileHash
+  //   2. Resolve detectedContentType (explicit field > multipart content-type)
+  //   3. If content type is text/markdown: skip Phase 1, use raw bytes as mdIntermediate
+  //      Else if a converter is registered: run Phase 1, store mdIntermediate → mdIntermediateHash
+  //      Else: graceful degrade — return extraction.status="skipped", no triples written
+  //   4. Run Phase 2 markdown extractor on the mdIntermediate → triples + provenance
+  //   5. Write triples + provenance to the assertion graph via agent.assertion.write
+  //   6. Record the extraction status in the in-memory Map, return ImportFileResponse
+  if (req.method === 'POST' && path.startsWith('/api/assertion/') && path.endsWith('/import-file')) {
+    const assertionName = safeDecodeURIComponent(path.slice('/api/assertion/'.length, -'/import-file'.length), res);
+    if (assertionName === null) return;
+    const nameVal = validateAssertionName(assertionName);
+    if (!nameVal.valid) return jsonResponse(res, 400, { error: `Invalid assertion name: ${nameVal.reason}` });
+
+    const boundary = parseBoundary(req.headers['content-type']);
+    if (!boundary) {
+      return jsonResponse(res, 400, { error: 'Request must be multipart/form-data with a boundary' });
+    }
+
+    let body: Buffer;
+    try {
+      body = await readBodyBuffer(req, MAX_UPLOAD_BYTES);
+    } catch (err: any) {
+      if (err instanceof PayloadTooLargeError) throw err;
+      return jsonResponse(res, 400, { error: `Failed to read request body: ${err.message}` });
+    }
+
+    let fields;
+    try {
+      fields = parseMultipart(body, boundary);
+    } catch (err: any) {
+      if (err instanceof MultipartParseError) {
+        return jsonResponse(res, 400, { error: `Malformed multipart body: ${err.message}` });
+      }
+      throw err;
+    }
+
+    const filePart = fields.find(f => f.name === 'file' && f.filename !== undefined);
+    if (!filePart) {
+      return jsonResponse(res, 400, { error: 'Missing required "file" field in multipart body' });
+    }
+    const textField = (name: string): string | undefined => {
+      const f = fields.find(x => x.name === name && x.filename === undefined);
+      return f ? f.content.toString('utf-8') : undefined;
+    };
+    const contextGraphId = textField('contextGraphId');
+    const contentTypeOverrideRaw = textField('contentType');
+    // Treat blank (`contentType=` with empty/whitespace value) as absent so we
+    // fall through to the file part's own Content-Type header instead of
+    // downgrading a real text/markdown / application/pdf upload to
+    // application/octet-stream and silently skipping extraction.
+    const contentTypeOverride =
+      contentTypeOverrideRaw && contentTypeOverrideRaw.trim().length > 0
+        ? contentTypeOverrideRaw
+        : undefined;
+    const ontologyRef = textField('ontologyRef');
+    const subGraphName = textField('subGraphName');
+
+    if (!validateRequiredContextGraphId(contextGraphId, res)) return;
+    if (!validateOptionalSubGraphName(subGraphName, res)) return;
+
+    const detectedContentType = normalizeDetectedContentType(contentTypeOverride ?? filePart.contentType);
+
+    if (subGraphName) {
+      try {
+        const registeredSubGraphs: Array<{ name: string }> = await agent.listSubGraphs(contextGraphId!);
+        if (!registeredSubGraphs.some(subGraph => subGraph.name === subGraphName)) {
+          return jsonResponse(res, 400, { error: unregisteredSubGraphError(contextGraphId!, subGraphName) });
+        }
+      } catch (err: any) {
+        return jsonResponse(res, 500, { error: `Failed to verify sub-graph registration: ${err.message}` });
+      }
+    }
+
+    // Persist the original upload to the file store.
+    let fileStoreEntry;
+    try {
+      fileStoreEntry = await fileStore.put(filePart.content, detectedContentType);
+    } catch (err: any) {
+      return jsonResponse(res, 500, { error: `Failed to store uploaded file: ${err.message}` });
+    }
+
+    const assertionUri = contextGraphAssertionUri(
+      contextGraphId!,
+      agent.peerId,
+      assertionName,
+      subGraphName,
+    );
+    const startedAt = new Date().toISOString();
+
+    // ── Phase 1: converter lookup + MD intermediate resolution ──
+    // text/markdown is deliberately NOT a registered converter content type.
+    // The raw uploaded bytes ARE the Markdown intermediate, so Phase 1 is skipped.
+    // For any other content type, look up a converter; if none is registered,
+    // gracefully degrade (store the file, skip extraction, return status=skipped).
+    let mdIntermediate: string | null = null;
+    let pipelineUsed: string | null = null;
+    let mdIntermediateHash: string | undefined;
+    const respondWithImportFileResponse = (statusCode: number, extraction: ImportFileExtractionPayload) =>
+      jsonResponse(
+        res,
+        statusCode,
+        buildImportFileResponse({
+          assertionUri,
+          fileHash: fileStoreEntry.hash,
+          detectedContentType,
+          extraction,
+        }),
+      );
+    const recordInProgressExtraction = (): void => {
+      setExtractionStatusRecord(extractionStatus, assertionUri, {
+        status: 'in_progress',
+        fileHash: fileStoreEntry.hash,
+        detectedContentType,
+        pipelineUsed,
+        tripleCount: 0,
+        ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+        startedAt,
+      });
+    };
+    const recordFailedExtraction = (
+      error: string,
+      tripleCount: number,
+      failedPipelineUsed: string | null = pipelineUsed,
+    ): ExtractionStatusRecord => {
+      const failedRecord: ExtractionStatusRecord = {
+        status: 'failed',
+        fileHash: fileStoreEntry.hash,
+        detectedContentType,
+        pipelineUsed: failedPipelineUsed,
+        tripleCount,
+        ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+        error,
+        startedAt,
+        completedAt: new Date().toISOString(),
+      };
+      setExtractionStatusRecord(extractionStatus, assertionUri, failedRecord);
+      return failedRecord;
+    };
+    const respondWithFailedExtraction = (
+      statusCode: number,
+      error: string,
+      tripleCount: number,
+      failedPipelineUsed: string | null = pipelineUsed,
+    ) => {
+      const failedRecord = recordFailedExtraction(error, tripleCount, failedPipelineUsed);
+      return respondWithImportFileResponse(statusCode, {
+        status: 'failed',
+        tripleCount,
+        pipelineUsed: failedRecord.pipelineUsed,
+        ...(failedRecord.mdIntermediateHash ? { mdIntermediateHash: failedRecord.mdIntermediateHash } : {}),
+        error,
+      });
+    };
+
+    recordInProgressExtraction();
+
+    if (detectedContentType === 'text/markdown') {
+      mdIntermediate = filePart.content.toString('utf-8');
+      pipelineUsed = 'text/markdown';
+      recordInProgressExtraction();
+    } else {
+      const converter = extractionRegistry.get(detectedContentType);
+      if (converter) {
+        try {
+          const { mdIntermediate: md } = await converter.extract({
+            filePath: fileStoreEntry.path,
+            contentType: detectedContentType,
+            ontologyRef,
+            agentDid: `did:dkg:agent:${agent.peerId}`,
+          });
+          mdIntermediate = md;
+          pipelineUsed = detectedContentType;
+          const mdEntry = await fileStore.put(Buffer.from(md, 'utf-8'), 'text/markdown');
+          mdIntermediateHash = mdEntry.hash;
+          recordInProgressExtraction();
+        } catch (err: any) {
+          return respondWithFailedExtraction(500, `Phase 1 converter failed: ${err.message}`, 0, detectedContentType);
+        }
+      }
+    }
+
+    // ── Graceful degrade: no converter registered and not text/markdown ──
+    // Store the file blob, return status=skipped, no triples written.
+    if (mdIntermediate === null) {
+      const skippedRecord: ExtractionStatusRecord = {
+        status: 'skipped',
+        fileHash: fileStoreEntry.hash,
+        detectedContentType,
+        pipelineUsed: null,
+        tripleCount: 0,
+        startedAt,
+        completedAt: new Date().toISOString(),
+      };
+      setExtractionStatusRecord(extractionStatus, assertionUri, skippedRecord);
+      return respondWithImportFileResponse(200, {
+        status: 'skipped',
+        tripleCount: 0,
+        pipelineUsed: null,
+      });
+    }
+
+    // ── Phase 2: markdown → triples + provenance ──
+    let triples;
+    let provenance;
+    try {
+      const result = extractFromMarkdown({
+        markdown: mdIntermediate,
+        agentDid: `did:dkg:agent:${agent.peerId}`,
+        ontologyRef,
+        documentIri: assertionUri,
+      });
+      triples = result.triples;
+      provenance = result.provenance;
+    } catch (err: any) {
+      return respondWithFailedExtraction(500, `Phase 2 extraction failed: ${err.message}`, 0);
+    }
+
+    // ── Write triples + provenance to the assertion graph ──
+    // The sub-graph registration check in assertionCreate/Write (finding 4 of #81)
+    // will throw if subGraphName is provided but unregistered — that's intentional.
+    const allTriples = [...triples, ...provenance];
+    try {
+      // Ensure the assertion graph exists even when Phase 2 yields zero triples,
+      // so a completed import always materializes the reported assertion URI.
+      try {
+        await agent.assertion.create(
+          contextGraphId!,
+          assertionName,
+          subGraphName ? { subGraphName } : undefined,
+        );
+      } catch (err: any) {
+        const message = err?.message ?? String(err);
+        if (message.includes('already exists') || message.includes('duplicate') || message.includes('conflict')) {
+          // create() is idempotent when the graph already exists.
+        } else if (
+          message.includes('has not been registered')
+          || message.includes('Invalid')
+          || message.includes('Unsafe')
+        ) {
+          return respondWithFailedExtraction(400, message, triples.length);
+        } else {
+          return respondWithFailedExtraction(500, message, triples.length);
+        }
+      }
+      if (allTriples.length > 0) {
+        await agent.assertion.write(
+          contextGraphId!,
+          assertionName,
+          allTriples.map(t => ({ subject: t.subject, predicate: t.predicate, object: t.object })),
+          subGraphName ? { subGraphName } : undefined,
+        );
+      }
+    } catch (err: any) {
+      const message = err?.message ?? String(err);
+      if (message.includes('has not been registered')) {
+        return respondWithFailedExtraction(400, message, triples.length);
+      }
+      if (message.includes('Invalid') || message.includes('Unsafe')) {
+        return respondWithFailedExtraction(400, message, triples.length);
+      }
+      // Unexpected write-stage failure: record the failure on the extraction
+      // status map before rethrowing so /extraction-status doesn't stay stuck
+      // at in_progress when the top-level 500 handler takes over.
+      recordFailedExtraction(message, triples.length);
+      throw err;
+    }
+
+    const completedRecord: ExtractionStatusRecord = {
+      status: 'completed',
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      pipelineUsed,
+      tripleCount: triples.length,
+      mdIntermediateHash,
+      startedAt,
+      completedAt: new Date().toISOString(),
+    };
+    setExtractionStatusRecord(extractionStatus, assertionUri, completedRecord);
+
+    return respondWithImportFileResponse(200, {
+      status: 'completed',
+      tripleCount: triples.length,
+      pipelineUsed,
+      ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+    });
+  }
+
+  // GET /api/assertion/:name/extraction-status?contextGraphId=...&subGraphName=...
+  // Returns the current extraction job state for the given assertion.
+  // Synchronous extractions (V10.0 default) return status="completed" immediately
+  // on the import-file response; this endpoint lets agents re-query the status
+  // later without having to hold the import-file response, and provides the hook
+  // for async extraction workflows in V10.x.
+  if (req.method === 'GET' && path.startsWith('/api/assertion/') && path.endsWith('/extraction-status')) {
+    const assertionName = safeDecodeURIComponent(path.slice('/api/assertion/'.length, -'/extraction-status'.length), res);
+    if (assertionName === null) return;
+    const nameVal = validateAssertionName(assertionName);
+    if (!nameVal.valid) return jsonResponse(res, 400, { error: `Invalid assertion name: ${nameVal.reason}` });
+    const contextGraphId = url.searchParams.get('contextGraphId') ?? url.searchParams.get('paranetId');
+    if (!validateRequiredContextGraphId(contextGraphId, res)) return;
+    const subGraphName = url.searchParams.get('subGraphName') ?? undefined;
+    if (!validateOptionalSubGraphName(subGraphName, res)) return;
+
+    const assertionUri = contextGraphAssertionUri(
+      contextGraphId!,
+      agent.peerId,
+      assertionName,
+      subGraphName,
+    );
+    const record = getExtractionStatusRecord(extractionStatus, assertionUri);
+    if (!record) {
+      return jsonResponse(res, 404, {
+        error: `No extraction record found for assertion "${assertionName}" in context graph "${contextGraphId}"`,
+      });
+    }
+    return jsonResponse(res, 200, {
+      assertionUri,
+      status: record.status,
+      fileHash: record.fileHash,
+      detectedContentType: record.detectedContentType,
+      pipelineUsed: record.pipelineUsed,
+      tripleCount: record.tripleCount,
+      ...(record.mdIntermediateHash ? { mdIntermediateHash: record.mdIntermediateHash } : {}),
+      ...(record.error ? { error: record.error } : {}),
+      startedAt: record.startedAt,
+      ...(record.completedAt ? { completedAt: record.completedAt } : {}),
+    });
+  }
+
   // POST /api/shared-memory/conditional-write  { contextGraphId, quads, conditions, subGraphName? }
   if (req.method === 'POST' && path === '/api/shared-memory/conditional-write') {
     const body = await readBody(req);
@@ -3083,6 +3454,45 @@ function validateConditions(conditions: unknown, res: ServerResponse): boolean {
 
 const MAX_BODY_BYTES = 10 * 1024 * 1024; // 10 MB — default for data-heavy endpoints (publish, update)
 const SMALL_BODY_BYTES = 256 * 1024; // 256 KB — for settings, connect, chat, and other small payloads
+const MAX_UPLOAD_BYTES = 50 * 1024 * 1024; // 50 MB — for import-file document uploads (PDFs, DOCX, etc.)
+
+/**
+ * In-memory extraction job tracking record. Populated at import-file time
+ * and queried by the extraction-status endpoint. Records are kept in a
+ * bounded, TTL-pruned map keyed by the target assertion URI (which is
+ * unique per agent × contextGraph × assertionName × subGraphName).
+ */
+interface ImportFileExtractionPayload {
+  status: 'completed' | 'skipped' | 'failed';
+  tripleCount: number;
+  pipelineUsed: string | null;
+  mdIntermediateHash?: string;
+  error?: string;
+}
+
+function buildImportFileResponse(args: {
+  assertionUri: string;
+  fileHash: string;
+  detectedContentType: string;
+  extraction: ImportFileExtractionPayload;
+}) {
+  return {
+    assertionUri: args.assertionUri,
+    fileHash: args.fileHash,
+    detectedContentType: args.detectedContentType,
+    extraction: {
+      status: args.extraction.status,
+      tripleCount: args.extraction.tripleCount,
+      pipelineUsed: args.extraction.pipelineUsed,
+      ...(args.extraction.mdIntermediateHash ? { mdIntermediateHash: args.extraction.mdIntermediateHash } : {}),
+      ...(args.extraction.error ? { error: args.extraction.error } : {}),
+    },
+  };
+}
+
+function unregisteredSubGraphError(contextGraphId: string, subGraphName: string): string {
+  return `Sub-graph "${subGraphName}" has not been registered in context graph "${contextGraphId}". Call createSubGraph() first.`;
+}
 
 
 function readBody(req: IncomingMessage, maxBytes = MAX_BODY_BYTES): Promise<string> {
@@ -3109,6 +3519,34 @@ function readBody(req: IncomingMessage, maxBytes = MAX_BODY_BYTES): Promise<stri
   });
 }
 
+/**
+ * Buffer variant of `readBody` that returns raw bytes. Use for binary payloads
+ * like multipart/form-data uploads where `.toString()` would corrupt content.
+ */
+function readBodyBuffer(req: IncomingMessage, maxBytes = MAX_BODY_BYTES): Promise<Buffer> {
+  return new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    let total = 0;
+    let rejected = false;
+    const onData = (c: Buffer) => {
+      if (rejected) return;
+      total += c.length;
+      if (total > maxBytes) {
+        rejected = true;
+        req.removeListener('data', onData);
+        req.resume();
+        setTimeout(() => req.destroy(), 5_000);
+        reject(new PayloadTooLargeError(maxBytes));
+        return;
+      }
+      chunks.push(c);
+    };
+    req.on('data', onData);
+    req.on('end', () => { if (!rejected) resolve(Buffer.concat(chunks)); });
+    req.on('error', (err) => { if (!rejected) reject(err); });
+  });
+}
+
 // ─── CORS / rate-limit / validation helpers ───────────────────────────
 
 type CorsAllowlist = '*' | string[];
diff --git a/packages/cli/src/extraction-status.ts b/packages/cli/src/extraction-status.ts
new file mode 100644
index 000000000..9f716432d
--- /dev/null
+++ b/packages/cli/src/extraction-status.ts
@@ -0,0 +1,63 @@
+export interface ExtractionStatusRecord {
+  status: 'in_progress' | 'completed' | 'skipped' | 'failed';
+  fileHash: string;
+  detectedContentType: string;
+  pipelineUsed: string | null;
+  tripleCount: number;
+  mdIntermediateHash?: string;
+  error?: string;
+  startedAt: string;
+  completedAt?: string;
+}
+
+export const EXTRACTION_STATUS_TTL_MS = 24 * 60 * 60 * 1000;
+export const MAX_EXTRACTION_STATUS_RECORDS = 1000;
+
+function extractionStatusSortKey(record: ExtractionStatusRecord): number {
+  const completedAtMs = record.completedAt ? Date.parse(record.completedAt) : Number.NaN;
+  if (Number.isFinite(completedAtMs)) return completedAtMs;
+  const startedAtMs = Date.parse(record.startedAt);
+  return Number.isFinite(startedAtMs) ? startedAtMs : 0;
+}
+
+export function pruneExtractionStatusRecords(
+  extractionStatus: Map<string, ExtractionStatusRecord>,
+  nowMs = Date.now(),
+): void {
+  for (const [assertionUri, record] of extractionStatus.entries()) {
+    const ageRefMs = extractionStatusSortKey(record);
+    if (ageRefMs > 0 && nowMs - ageRefMs > EXTRACTION_STATUS_TTL_MS) {
+      extractionStatus.delete(assertionUri);
+    }
+  }
+
+  if (extractionStatus.size <= MAX_EXTRACTION_STATUS_RECORDS) return;
+
+  const oldestFirst = [...extractionStatus.entries()].sort(
+    ([, left], [, right]) => extractionStatusSortKey(left) - extractionStatusSortKey(right),
+  );
+
+  for (const [assertionUri, record] of oldestFirst) {
+    if (extractionStatus.size <= MAX_EXTRACTION_STATUS_RECORDS) break;
+    if (record.status !== 'in_progress') {
+      extractionStatus.delete(assertionUri);
+    }
+  }
+}
+
+export function setExtractionStatusRecord(
+  extractionStatus: Map<string, ExtractionStatusRecord>,
+  assertionUri: string,
+  record: ExtractionStatusRecord,
+): void {
+  extractionStatus.set(assertionUri, record);
+  pruneExtractionStatusRecords(extractionStatus);
+}
+
+export function getExtractionStatusRecord(
+  extractionStatus: Map<string, ExtractionStatusRecord>,
+  assertionUri: string,
+): ExtractionStatusRecord | undefined {
+  pruneExtractionStatusRecords(extractionStatus);
+  return extractionStatus.get(assertionUri);
+}
diff --git a/packages/cli/src/extraction/index.ts b/packages/cli/src/extraction/index.ts
index a4b72e041..f139cb436 100644
--- a/packages/cli/src/extraction/index.ts
+++ b/packages/cli/src/extraction/index.ts
@@ -1 +1,6 @@
 export { MarkItDownConverter, isMarkItDownAvailable, MARKITDOWN_CONTENT_TYPES } from './markitdown-converter.js';
+export {
+  extractFromMarkdown,
+  type MarkdownExtractInput,
+  type MarkdownExtractOutput,
+} from './markdown-extractor.js';
diff --git a/packages/cli/src/extraction/markdown-extractor.ts b/packages/cli/src/extraction/markdown-extractor.ts
new file mode 100644
index 000000000..953ed3fe7
--- /dev/null
+++ b/packages/cli/src/extraction/markdown-extractor.ts
@@ -0,0 +1,428 @@
+/**
+ * Phase 2 of document ingestion: deterministic structural extraction
+ * from a Markdown intermediate to RDF triples + provenance.
+ *
+ * This is the "Layer 1 structural" extraction defined by
+ * `19_MARKDOWN_CONTENT_TYPE.md` — it runs without an LLM and produces
+ * triples from explicit Markdown/YAML structure only:
+ *
+ *   - YAML frontmatter keys → subject properties
+ *   - `type` frontmatter key → rdf:type
+ *   - Wikilinks `[[Target]]` → schema:mentions
+ *   - Hashtags `#keyword` → schema:keywords
+ *   - Dataview `key:: value` inline fields → properties
+ *   - Heading hierarchy → dkg:hasSection
+ *
+ * Every extracted triple gets a provenance record pointing to a
+ * `dkg:ExtractionProvenance` blank identifier so downstream consumers
+ * can distinguish structurally-derived triples from user-asserted ones.
+ *
+ * Spec: 05_PROTOCOL_EXTENSIONS.md §6.5.2, 19_MARKDOWN_CONTENT_TYPE.md
+ */
+
+import { createHash } from 'node:crypto';
+import { load as loadYaml } from 'js-yaml';
+import type { ExtractionQuad as Quad } from '@origintrail-official/dkg-core';
+
+const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+const SCHEMA_NAME = 'http://schema.org/name';
+const SCHEMA_DESCRIPTION = 'http://schema.org/description';
+const SCHEMA_MENTIONS = 'http://schema.org/mentions';
+const SCHEMA_KEYWORDS = 'http://schema.org/keywords';
+const DKG_HAS_SECTION = 'http://dkg.io/ontology/hasSection';
+const DKG_EXTRACTION_PROVENANCE = 'http://dkg.io/ontology/ExtractionProvenance';
+const DKG_DERIVED_FROM = 'http://dkg.io/ontology/derivedFrom';
+const DKG_EXTRACTED_BY = 'http://dkg.io/ontology/extractedBy';
+const DKG_EXTRACTION_RULE = 'http://dkg.io/ontology/extractionRule';
+const DKG_EXTRACTED_AT = 'http://dkg.io/ontology/extractedAt';
+const PROV_WAS_GENERATED_BY = 'http://www.w3.org/ns/prov#wasGeneratedBy';
+const XSD_BOOLEAN = 'http://www.w3.org/2001/XMLSchema#boolean';
+const XSD_DATE = 'http://www.w3.org/2001/XMLSchema#date';
+const XSD_DATE_TIME = 'http://www.w3.org/2001/XMLSchema#dateTime';
+const XSD_DECIMAL = 'http://www.w3.org/2001/XMLSchema#decimal';
+const XSD_INTEGER = 'http://www.w3.org/2001/XMLSchema#integer';
+
+export interface MarkdownExtractInput {
+  /** Markdown source text (the Phase 1 mdIntermediate). */
+  markdown: string;
+  /** DID of the extracting agent, recorded in provenance. */
+  agentDid: string;
+  /** Optional ontology URI (not yet used by Layer 1 — reserved for Layer 2). */
+  ontologyRef?: string;
+  /**
+   * Optional stable subject IRI for the document. When omitted, the extractor
+   * derives a subject from frontmatter `id` or the first H1 heading.
+   */
+  documentIri?: string;
+  /** Optional timestamp for provenance (defaults to now). */
+  now?: Date;
+}
+
+export interface MarkdownExtractOutput {
+  /** Extracted RDF triples. */
+  triples: Quad[];
+  /** dkg:ExtractionProvenance quads for the extraction run. */
+  provenance: Quad[];
+  /** The subject IRI used for the document (useful to the caller for indexing). */
+  subjectIri: string;
+}
+
+/**
+ * Parse YAML frontmatter if present. Returns the parsed object and the
+ * remaining markdown body with frontmatter stripped.
+ */
+function splitFrontmatter(markdown: string): { frontmatter: Record<string, unknown> | null; body: string } {
+  if (!markdown.startsWith('---')) {
+    return { frontmatter: null, body: markdown };
+  }
+  // Match the opening --- and find the closing ---
+  const lines = markdown.split(/\r?\n/);
+  if (lines[0].trim() !== '---') {
+    return { frontmatter: null, body: markdown };
+  }
+  let endIndex = -1;
+  for (let i = 1; i < lines.length; i++) {
+    if (lines[i].trim() === '---') {
+      endIndex = i;
+      break;
+    }
+  }
+  if (endIndex === -1) {
+    return { frontmatter: null, body: markdown };
+  }
+  const yamlText = lines.slice(1, endIndex).join('\n');
+  let parsed: unknown;
+  try {
+    parsed = loadYaml(yamlText);
+  } catch {
+    return { frontmatter: null, body: markdown };
+  }
+  if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
+    return { frontmatter: null, body: markdown };
+  }
+  const body = lines.slice(endIndex + 1).join('\n');
+  return { frontmatter: parsed as Record<string, unknown>, body };
+}
+
+/** Extract the text of the first level-1 heading, if any. */
+function findFirstH1(body: string): string | null {
+  const m = stripCodeFences(body).match(/^#\s+(.+?)\s*$/m);
+  return m ? m[1].trim() : null;
+}
+
+/**
+ * Slugify a string for use in an IRI fragment. Keeps alphanumerics and hyphens.
+ */
+function slugify(input: string): string {
+  const slug = input
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 80);
+  if (slug.length > 0) return slug;
+  return `hash-${shortHash(input)}`;
+}
+
+function shortHash(input: string): string {
+  return createHash('sha256').update(input).digest('hex').slice(0, 12);
+}
+
+function typedLiteral(lexicalForm: string, datatypeIri: string): string {
+  return `${JSON.stringify(lexicalForm)}^^<${datatypeIri}>`;
+}
+
+function normalizeSchemaLocalName(raw: string, kind: 'property' | 'class'): string | null {
+  const stripped = raw.trim().replace(/\(([^)]*)\)/g, '$1');
+  if (stripped.length === 0) return null;
+
+  const asciiTokens = stripped.match(/[A-Za-z0-9]+/g);
+  if (asciiTokens && asciiTokens.length > 0) {
+    return asciiTokens
+      .map((token, index) => {
+        if (kind === 'property' && index === 0) {
+          return token[0]!.toLowerCase() + token.slice(1);
+        }
+        return token[0]!.toUpperCase() + token.slice(1);
+      })
+      .join('');
+  }
+
+  const encoded = encodeURIComponent(stripped);
+  return encoded.length > 0 ? encoded : null;
+}
+
+/**
+ * Resolve a stable subject IRI for the document:
+ *   1. explicit `documentIri` argument, or
+ *   2. frontmatter `id` (if it looks like an IRI or a slug), or
+ *   3. slugified first H1 heading with an `urn:dkg:md:` prefix, or
+ *   4. stable fallback `urn:dkg:md:anonymous-{short-hash}` derived from the full body.
+ */
+function resolveSubjectIri(
+  input: MarkdownExtractInput,
+  frontmatter: Record<string, unknown> | null,
+  body: string,
+): string {
+  if (input.documentIri && input.documentIri.length > 0) return input.documentIri;
+
+  const fmId = frontmatter?.['id'];
+  if (typeof fmId === 'string' && fmId.length > 0) {
+    if (/^(https?:|did:|urn:|_:)/.test(fmId)) return fmId;
+    return `urn:dkg:md:${slugify(fmId)}`;
+  }
+
+  const h1 = findFirstH1(body);
+  if (h1) return `urn:dkg:md:${slugify(h1)}`;
+
+  return `urn:dkg:md:anonymous-${shortHash(body)}`;
+}
+
+/** Resolve a value from a frontmatter `type` field to a full IRI. */
+function resolveTypeIri(typeValue: unknown): string | null {
+  if (typeof typeValue !== 'string' || typeValue.length === 0) return null;
+  if (/^(https?:|did:|urn:)/.test(typeValue)) return typeValue;
+  // Treat bare identifiers as schema.org classes by convention (Report, Person, etc.)
+  const localName = normalizeSchemaLocalName(typeValue, 'class');
+  return localName ? `http://schema.org/${localName}` : null;
+}
+
+/** Resolve a frontmatter scalar value to a triple object literal or IRI. */
+function resolveFrontmatterValue(value: unknown): string | null {
+  if (value === null || value === undefined) return null;
+  if (typeof value === 'string') {
+    if (/^(https?:|did:|urn:)/.test(value)) return value;
+    return JSON.stringify(value);
+  }
+  if (value instanceof Date) {
+    if (Number.isNaN(value.getTime())) return null;
+    const isUtcDateOnly =
+      value.getUTCHours() === 0
+      && value.getUTCMinutes() === 0
+      && value.getUTCSeconds() === 0
+      && value.getUTCMilliseconds() === 0;
+    return isUtcDateOnly
+      ? typedLiteral(value.toISOString().slice(0, 10), XSD_DATE)
+      : typedLiteral(value.toISOString(), XSD_DATE_TIME);
+  }
+  if (typeof value === 'number') {
+    if (!Number.isFinite(value)) return null;
+    return Number.isInteger(value)
+      ? typedLiteral(String(value), XSD_INTEGER)
+      : typedLiteral(String(value), XSD_DECIMAL);
+  }
+  if (typeof value === 'boolean') {
+    return typedLiteral(value ? 'true' : 'false', XSD_BOOLEAN);
+  }
+  return null;
+}
+
+/** Extract wikilinks `[[Target]]` or `[[Target|Alt]]` → IRIs using the `urn:dkg:md:` namespace. */
+function extractWikilinks(body: string): string[] {
+  const out = new Set<string>();
+  const noFences = stripCodeFences(body);
+  const re = /\[\[([^\]|#]+?)(?:#[^\]|]*)?(?:\|[^\]]*?)?\]\]/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(noFences)) !== null) {
+    const target = m[1].trim();
+    if (target.length === 0) continue;
+    out.add(`urn:dkg:md:${slugify(target)}`);
+  }
+  return [...out];
+}
+
+/**
+ * Extract hashtags `#tag` from the body. Excludes markdown headings
+ * (lines starting with `#` followed by a space) and code fence contents.
+ */
+function extractHashtags(body: string): string[] {
+  const out = new Set<string>();
+  const noFences = stripCodeFences(body);
+  const noHeadings = noFences.replace(/^#{1,6}\s+.*$/gm, '');
+  // Match `#word` where word is alphanumeric + `_`/`-`/`/`, not preceded by `[`
+  // (to avoid `[#heading]` anchors) and not followed by more `#`.
+  const re = /(?:^|[^\w#[/])#([a-zA-Z][\w-/]*)/g;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(noHeadings)) !== null) {
+    out.add(m[1]);
+  }
+  return [...out];
+}
+
+/**
+ * Extract Dataview inline fields: `key:: value` anywhere in a visible line.
+ * Returns key-value pairs with raw string values; the caller translates to triples.
+ */
+function extractDataviewFields(body: string): Array<{ key: string; value: string }> {
+  const out: Array<{ key: string; value: string }> = [];
+  const noFences = stripCodeFences(body);
+  for (const line of noFences.split(/\r?\n/)) {
+    const re = /(?:^|[^\w])([a-zA-Z][\w-]*)::\s*(.+?)(?=(?:\s+[a-zA-Z][\w-]*::)|$)/g;
+    let m: RegExpExecArray | null;
+    while ((m = re.exec(line)) !== null) {
+      out.push({ key: m[1], value: m[2].trim() });
+    }
+  }
+  return out;
+}
+
+/** Extract section headings (H1..H6) as an ordered list with levels. */
+function extractHeadings(body: string): Array<{ level: number; text: string }> {
+  const noFences = stripCodeFences(body);
+  const out: Array<{ level: number; text: string }> = [];
+  const re = /^(#{1,6})\s+(.+?)\s*#*\s*$/gm;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(noFences)) !== null) {
+    out.push({ level: m[1].length, text: m[2].trim() });
+  }
+  return out;
+}
+
+/** Strip ``` fenced code blocks (and ~~~ variants) from the markdown. */
+function stripCodeFences(body: string): string {
+  const lines = body.split(/\r?\n/);
+  const keptLines: string[] = [];
+  let activeFence: { char: '`' | '~'; length: number } | null = null;
+
+  for (const line of lines) {
+    const trimmedEnd = line.trimEnd();
+    const fenceMatch = trimmedEnd.match(/^ {0,3}(([`~])\2{2,})(.*)$/);
+
+    if (!activeFence) {
+      if (fenceMatch) {
+        activeFence = {
+          char: fenceMatch[2] as '`' | '~',
+          length: fenceMatch[1].length,
+        };
+        continue;
+      }
+      keptLines.push(line);
+      continue;
+    }
+
+    if (
+      fenceMatch
+      && fenceMatch[2] === activeFence.char
+      && fenceMatch[1].length >= activeFence.length
+      && fenceMatch[3].trim().length === 0
+    ) {
+      activeFence = null;
+    }
+  }
+
+  return keptLines.join('\n');
+}
+
+/**
+ * Run the full Phase 2 structural extraction. Deterministic, no LLM.
+ * Returns `{ triples, provenance, subjectIri }`. Empty arrays are valid
+ * — a Markdown document with no frontmatter, no wikilinks, no tags, no
+ * dataview fields, and no headings produces zero triples.
+ */
+export function extractFromMarkdown(input: MarkdownExtractInput): MarkdownExtractOutput {
+  const triples: Quad[] = [];
+  const now = input.now ?? new Date();
+
+  const { frontmatter, body } = splitFrontmatter(input.markdown);
+  const subject = resolveSubjectIri(input, frontmatter, body);
+
+  // ── 1. YAML frontmatter → properties ───────────────────────────────
+  if (frontmatter) {
+    for (const [key, value] of Object.entries(frontmatter)) {
+      if (key === 'id') continue; // already used as subject identifier
+      if (key === 'type') {
+        const typeIri = resolveTypeIri(value);
+        if (typeIri) triples.push({ subject, predicate: RDF_TYPE, object: typeIri });
+        continue;
+      }
+      // Array values emit one triple per element.
+      const values = Array.isArray(value) ? value : [value];
+      for (const v of values) {
+        const obj = resolveFrontmatterValue(v);
+        if (obj === null) continue;
+        const predicate = frontmatterKeyToPredicate(key);
+        if (predicate === null) continue;
+        triples.push({ subject, predicate, object: obj });
+      }
+    }
+  }
+
+  // Promote first H1 → schema:name if no explicit name triple exists.
+  const h1 = findFirstH1(body);
+  if (h1 && !triples.some(q => q.predicate === SCHEMA_NAME)) {
+    triples.push({ subject, predicate: SCHEMA_NAME, object: JSON.stringify(h1) });
+  }
+
+  // ── 2. Wikilinks → schema:mentions ─────────────────────────────────
+  for (const target of extractWikilinks(body)) {
+    triples.push({ subject, predicate: SCHEMA_MENTIONS, object: target });
+  }
+
+  // ── 3. Hashtags → schema:keywords ──────────────────────────────────
+  for (const tag of extractHashtags(body)) {
+    triples.push({ subject, predicate: SCHEMA_KEYWORDS, object: JSON.stringify(tag) });
+  }
+
+  // ── 4. Dataview inline fields → properties ─────────────────────────
+  for (const { key, value } of extractDataviewFields(body)) {
+    const predicate = frontmatterKeyToPredicate(key);
+    if (predicate === null) continue;
+    const obj = /^(https?:|did:|urn:)/.test(value) ? value : JSON.stringify(value);
+    triples.push({ subject, predicate, object: obj });
+  }
+
+  // ── 5. Headings → dkg:hasSection ───────────────────────────────────
+  let sectionIndex = 0;
+  const sectionStack: Array<{ level: number; iri: string }> = [];
+  for (const heading of extractHeadings(body)) {
+    if (heading.level === 1) continue; // H1 is the document title, not a section
+    sectionIndex += 1;
+    const sectionIri = `${subject}#section-${sectionIndex}-${slugify(heading.text)}`;
+    while (sectionStack.length > 0 && sectionStack[sectionStack.length - 1]!.level >= heading.level) {
+      sectionStack.pop();
+    }
+    const parentSection = sectionStack.length > 0
+      ? sectionStack[sectionStack.length - 1]!.iri
+      : subject;
+    triples.push({ subject: parentSection, predicate: DKG_HAS_SECTION, object: sectionIri });
+    triples.push({ subject: sectionIri, predicate: SCHEMA_NAME, object: JSON.stringify(heading.text) });
+    sectionStack.push({ level: heading.level, iri: sectionIri });
+  }
+
+  // ── Provenance ─────────────────────────────────────────────────────
+  const provenance = buildProvenance({
+    subject,
+    agentDid: input.agentDid,
+    tripleCount: triples.length,
+    now,
+  });
+
+  return { triples, provenance, subjectIri: subject };
+}
+
+function frontmatterKeyToPredicate(key: string): string | null {
+  if (key === 'name' || key === 'title') return SCHEMA_NAME;
+  if (key === 'description' || key === 'summary') return SCHEMA_DESCRIPTION;
+  if (key === 'keywords' || key === 'tags') return SCHEMA_KEYWORDS;
+  // Unknown keys fall back into the schema.org namespace (same convention as `type`).
+  const localName = normalizeSchemaLocalName(key, 'property');
+  return localName ? `http://schema.org/${localName}` : null;
+}
+
+function buildProvenance(args: {
+  subject: string;
+  agentDid: string;
+  tripleCount: number;
+  now: Date;
+}): Quad[] {
+  if (args.tripleCount === 0) return [];
+  const provIri = `urn:dkg:extraction:${slugify(args.subject)}-${args.now.getTime()}`;
+  const xsdDateTime = `"${args.now.toISOString()}"^^<${XSD_DATE_TIME}>`;
+  return [
+    { subject: provIri, predicate: RDF_TYPE, object: DKG_EXTRACTION_PROVENANCE },
+    { subject: provIri, predicate: DKG_EXTRACTED_BY, object: args.agentDid },
+    { subject: provIri, predicate: DKG_EXTRACTION_RULE, object: JSON.stringify('markdown-structural-v1') },
+    { subject: provIri, predicate: DKG_EXTRACTED_AT, object: xsdDateTime },
+    { subject: provIri, predicate: DKG_DERIVED_FROM, object: args.subject },
+    { subject: args.subject, predicate: PROV_WAS_GENERATED_BY, object: provIri },
+  ];
+}
diff --git a/packages/cli/src/extraction/markitdown-converter.ts b/packages/cli/src/extraction/markitdown-converter.ts
index 1ccb15616..fa86ad5e8 100644
--- a/packages/cli/src/extraction/markitdown-converter.ts
+++ b/packages/cli/src/extraction/markitdown-converter.ts
@@ -13,7 +13,7 @@ import { existsSync } from 'node:fs';
 import { resolve, join } from 'node:path';
 import { platform, arch } from 'node:process';
 import { fileURLToPath } from 'node:url';
-import type { ExtractionPipeline, ExtractionInput, ExtractionOutput } from '@origintrail-official/dkg-core';
+import type { ExtractionPipeline, ExtractionInput, ConverterOutput } from '@origintrail-official/dkg-core';
 
 const MAX_OUTPUT_BYTES = 50 * 1024 * 1024; // 50 MB
 
@@ -83,16 +83,8 @@ export const MARKITDOWN_CONTENT_TYPES = [
 export class MarkItDownConverter implements ExtractionPipeline {
   readonly contentTypes = [...MARKITDOWN_CONTENT_TYPES];
 
-  async extract(input: ExtractionInput): Promise<ExtractionOutput> {
+  async extract(input: ExtractionInput): Promise<ConverterOutput> {
     const markdown = await runMarkItDown(input.filePath);
-
-    // Phase 2 (markdown → triples) is handled by the Markdown extraction pipeline
-    // which runs separately. This converter only does phase 1: file → Markdown.
-    // Return the intermediate with empty triples; the caller chains the MD pipeline.
-    return {
-      mdIntermediate: markdown,
-      triples: [],
-      provenance: [],
-    };
+    return { mdIntermediate: markdown };
   }
 }
diff --git a/packages/cli/src/file-store.ts b/packages/cli/src/file-store.ts
new file mode 100644
index 000000000..ee70b0689
--- /dev/null
+++ b/packages/cli/src/file-store.ts
@@ -0,0 +1,119 @@
+/**
+ * Content-addressed file store for uploaded files.
+ *
+ * Files are stored on disk keyed by their sha256 hash. Two-level sharded
+ * directory layout (`ab/cdef...`) keeps any single directory at a reasonable
+ * size even after many uploads.
+ *
+ * Used by the import-file route handler to persist originals and Markdown
+ * intermediates produced by converters. File identity is the content hash
+ * returned by `put()`, which callers surface as `fileHash` and
+ * `mdIntermediateHash` in the import-file response.
+ *
+ * Spec: 05_PROTOCOL_EXTENSIONS.md §6.5
+ */
+
+import { createHash } from 'node:crypto';
+import { mkdir, readFile, rename, stat, unlink, writeFile } from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import { join, resolve } from 'node:path';
+
+export interface FileStoreEntry {
+  /** sha256 hash of the file contents, formatted as `sha256:<hex>`. */
+  hash: string;
+  /** Absolute path to the stored file on disk. */
+  path: string;
+  /** Size of the file in bytes. */
+  size: number;
+  /** MIME content type recorded at put() time. */
+  contentType: string;
+}
+
+export class FileStore {
+  private readonly rootDir: string;
+
+  constructor(rootDir: string) {
+    this.rootDir = resolve(rootDir);
+  }
+
+  /**
+   * Persist `bytes` to the store and return the resulting entry. Idempotent:
+   * re-putting the same bytes returns the same hash without rewriting the
+   * existing blob. The `contentType` metadata is
+   * attached to the return value but not persisted to disk — callers that
+   * need durable content-type metadata should store it separately (e.g. in
+   * an `_meta` triple keyed by hash).
+   */
+  async put(bytes: Buffer, contentType: string): Promise<FileStoreEntry> {
+    const hex = createHash('sha256').update(bytes).digest('hex');
+    const hash = `sha256:${hex}`;
+    const path = this.resolvePath(hex);
+    await mkdir(join(this.rootDir, hex.slice(0, 2)), { recursive: true });
+    if (!existsSync(path)) {
+      const tempPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2)}`;
+      try {
+        await writeFile(tempPath, bytes, { flag: 'wx' });
+        try {
+          await rename(tempPath, path);
+        } catch (err: any) {
+          if (!existsSync(path)) {
+            throw err;
+          }
+        }
+      } finally {
+        if (existsSync(tempPath)) {
+          await unlink(tempPath).catch(() => {});
+        }
+      }
+    }
+    return { hash, path, size: bytes.length, contentType };
+  }
+
+  /** Retrieve the raw bytes for a previously-stored hash, or null if absent. */
+  async get(hash: string): Promise<Buffer | null> {
+    const path = this.hashToPath(hash);
+    if (!path) return null;
+    if (!existsSync(path)) return null;
+    return readFile(path);
+  }
+
+  /** Check whether a hash is present in the store. */
+  async has(hash: string): Promise<boolean> {
+    const path = this.hashToPath(hash);
+    if (!path) return false;
+    try {
+      await stat(path);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /** Resolve a hash to its on-disk path, or null for malformed hashes. */
+  hashToPath(hash: string): string | null {
+    const hex = normalizeHash(hash);
+    if (!hex) return null;
+    return this.resolvePath(hex);
+  }
+
+  /** Root directory the store writes into. */
+  get directory(): string {
+    return this.rootDir;
+  }
+
+  private resolvePath(hex: string): string {
+    return join(this.rootDir, hex.slice(0, 2), hex.slice(2));
+  }
+}
+
+/**
+ * Normalize a hash string to its 64-char hex form. Accepts either the
+ * prefixed (`sha256:abcd...`) or bare (`abcd...`) variants. Returns null for
+ * anything that isn't a valid sha256 hex.
+ */
+function normalizeHash(hash: string): string | null {
+  if (typeof hash !== 'string') return null;
+  const hex = hash.startsWith('sha256:') ? hash.slice('sha256:'.length) : hash;
+  if (!/^[0-9a-f]{64}$/i.test(hex)) return null;
+  return hex.toLowerCase();
+}
diff --git a/packages/cli/src/http/multipart.ts b/packages/cli/src/http/multipart.ts
new file mode 100644
index 000000000..104415419
--- /dev/null
+++ b/packages/cli/src/http/multipart.ts
@@ -0,0 +1,179 @@
+/**
+ * Minimal `multipart/form-data` parser (RFC 7578 / RFC 2046).
+ *
+ * Handles the subset needed by the import-file upload endpoint:
+ * - A single file part with `Content-Disposition: form-data; name="file"; filename="..."`
+ *   and an optional `Content-Type` header. The part body is captured as raw bytes.
+ * - Zero or more text parts with `Content-Disposition: form-data; name="..."` and a
+ *   utf-8 string body.
+ *
+ * Deliberate non-features (out of scope for V10.0):
+ * - Nested multipart bodies (`multipart/mixed` inside a part)
+ * - `Content-Transfer-Encoding: base64` / `quoted-printable` (browsers don't send these)
+ * - Streaming — we parse a fully-buffered `Buffer`, which is the shape daemon.ts
+ *   already has from `readBody`
+ * - Charset negotiation on text parts — everything non-file is treated as utf-8
+ *
+ * Throws `MultipartParseError` on malformed input so the route handler can
+ * return a clean 400 to the caller.
+ */
+
+export class MultipartParseError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = 'MultipartParseError';
+  }
+}
+
+export interface MultipartField {
+  /** `name` attribute from the `Content-Disposition` header. */
+  name: string;
+  /** `filename` attribute, if the part is a file upload. Undefined for text parts. */
+  filename?: string;
+  /** `Content-Type` header of the part, or undefined if not provided. */
+  contentType?: string;
+  /** Raw part body as bytes. For text parts, caller can decode via `.toString('utf-8')`. */
+  content: Buffer;
+}
+
+/**
+ * Extract the boundary token from a `Content-Type: multipart/form-data; boundary=...` header.
+ * Returns null if the header is missing, malformed, ambiguous, or not multipart/form-data.
+ *
+ * Accepts the full `IncomingHttpHeaders['content-type']` shape (`string | string[] | undefined`)
+ * so that callers can pass `req.headers['content-type']` directly. Array values — which Node
+ * can deliver when a client sends duplicated Content-Type headers — are rejected as ambiguous
+ * rather than coerced, so the route handler returns a clean 400 instead of crashing inside
+ * `.toLowerCase()`.
+ */
+export function parseBoundary(contentTypeHeader: string | string[] | undefined): string | null {
+  if (contentTypeHeader === undefined) return null;
+  if (Array.isArray(contentTypeHeader)) return null;
+  const lower = contentTypeHeader.toLowerCase();
+  if (!lower.startsWith('multipart/form-data')) return null;
+  const match = contentTypeHeader.match(/boundary\s*=\s*(?:"([^"]+)"|([^\s;]+))/i);
+  if (!match) return null;
+  return match[1] ?? match[2] ?? null;
+}
+
+/**
+ * Parse a fully-buffered `multipart/form-data` body into its constituent fields.
+ * `boundary` is the boundary token (without the leading `--`).
+ */
+export function parseMultipart(body: Buffer, boundary: string): MultipartField[] {
+  if (!boundary || boundary.length === 0) {
+    throw new MultipartParseError('Empty boundary');
+  }
+  const delimiter = Buffer.from(`--${boundary}`);
+  const encapsulatedDelimiter = Buffer.from(`\r\n--${boundary}`);
+  const crlf = Buffer.from('\r\n');
+  const doubleCrlf = Buffer.from('\r\n\r\n');
+
+  // Find first delimiter. Spec allows CRLF or just the delimiter at the start.
+  let cursor = body.indexOf(delimiter);
+  if (cursor < 0) {
+    throw new MultipartParseError('Missing opening boundary');
+  }
+
+  const fields: MultipartField[] = [];
+  const maxIterations = 1000;
+  let iterations = 0;
+
+  while (cursor < body.length) {
+    if (++iterations > maxIterations) {
+      throw new MultipartParseError('Too many parts (>1000)');
+    }
+    // Move past the boundary delimiter
+    cursor += delimiter.length;
+    // Check for closing `--` (final boundary)
+    if (cursor + 2 <= body.length && body[cursor] === 0x2d && body[cursor + 1] === 0x2d) {
+      return fields;
+    }
+    // Skip trailing CRLF after delimiter
+    if (cursor + 2 <= body.length && body[cursor] === 0x0d && body[cursor + 1] === 0x0a) {
+      cursor += 2;
+    } else {
+      throw new MultipartParseError('Malformed boundary: expected CRLF after delimiter');
+    }
+    // Find end-of-headers (\r\n\r\n)
+    const headerEnd = body.indexOf(doubleCrlf, cursor);
+    if (headerEnd < 0) {
+      throw new MultipartParseError('Malformed part: no header terminator');
+    }
+    const headerBytes = body.subarray(cursor, headerEnd);
+    const headers = parseHeaders(headerBytes);
+    const contentStart = headerEnd + doubleCrlf.length;
+
+    // Find the next real multipart boundary. Per RFC 2046, encapsulated boundaries
+    // must start on a new line, so raw `--${boundary}` bytes inside the payload do
+    // not count unless they are preceded by CRLF.
+    const nextBoundary = findNextBoundary(body, encapsulatedDelimiter, contentStart);
+    if (nextBoundary < 0) {
+      throw new MultipartParseError('Malformed part: no closing boundary');
+    }
+    const nextDelimiter = nextBoundary + crlf.length;
+    // Part body ends at the CRLF that introduces the next boundary.
+    const contentEnd = nextBoundary;
+    const content = body.subarray(contentStart, contentEnd);
+
+    const disposition = headers.get('content-disposition');
+    if (!disposition) {
+      throw new MultipartParseError('Malformed part: missing Content-Disposition');
+    }
+    // Anchor parameter matches to a real `;` boundary (or start of string) so
+    // `name=` doesn't accidentally match the `name=` substring inside `filename=`,
+    // and vice versa. Without this, a part with only `filename="x"` (no `name`)
+    // would be silently mis-routed as `name="x"`.
+    const nameMatch = disposition.match(/(?:^|;)\s*name\s*=\s*(?:"([^"]*)"|([^;]+))/i);
+    if (!nameMatch) {
+      throw new MultipartParseError('Malformed part: Content-Disposition without name');
+    }
+    const filenameMatch = disposition.match(/(?:^|;)\s*filename\s*=\s*(?:"([^"]*)"|([^;]+))/i);
+    fields.push({
+      name: (nameMatch[1] ?? nameMatch[2] ?? '').trim(),
+      filename: filenameMatch ? (filenameMatch[1] ?? filenameMatch[2] ?? '').trim() : undefined,
+      contentType: headers.get('content-type'),
+      content: Buffer.from(content),
+    });
+
+    cursor = nextDelimiter;
+  }
+
+  throw new MultipartParseError('Unexpected end of body');
+}
+
+function findNextBoundary(body: Buffer, encapsulatedDelimiter: Buffer, start: number): number {
+  let candidate = body.indexOf(encapsulatedDelimiter, start);
+  while (candidate >= 0) {
+    const boundaryEnd = candidate + encapsulatedDelimiter.length;
+    const nextFirstByte = body[boundaryEnd];
+    const nextSecondByte = body[boundaryEnd + 1];
+    const isBoundaryTerminator =
+      (nextFirstByte === 0x0d && nextSecondByte === 0x0a)
+      || (nextFirstByte === 0x2d && nextSecondByte === 0x2d);
+    if (isBoundaryTerminator) {
+      return candidate;
+    }
+    candidate = body.indexOf(encapsulatedDelimiter, candidate + 1);
+  }
+  return -1;
+}
+
+/**
+ * Parse a raw header block (CRLF-delimited) into a lower-cased key → value map.
+ * Multi-line folded headers are not supported (RFC 7578 §5.3 says field names
+ * in multipart/form-data must use the simpler RFC 2183 header format).
+ */
+function parseHeaders(block: Buffer): Map<string, string> {
+  const headers = new Map<string, string>();
+  const text = block.toString('utf-8');
+  for (const line of text.split(/\r?\n/)) {
+    if (line.length === 0) continue;
+    const colonIdx = line.indexOf(':');
+    if (colonIdx < 0) continue;
+    const name = line.slice(0, colonIdx).trim().toLowerCase();
+    const value = line.slice(colonIdx + 1).trim();
+    headers.set(name, value);
+  }
+  return headers;
+}
diff --git a/packages/cli/test/document-processor-e2e.test.ts b/packages/cli/test/document-processor-e2e.test.ts
index 551c89d35..f1c721866 100644
--- a/packages/cli/test/document-processor-e2e.test.ts
+++ b/packages/cli/test/document-processor-e2e.test.ts
@@ -13,7 +13,7 @@ import {
   ExtractionPipelineRegistry,
   type ExtractionPipeline,
   type ExtractionInput,
-  type ExtractionOutput,
+  type ConverterOutput,
 } from '@origintrail-official/dkg-core';
 import { MarkItDownConverter, isMarkItDownAvailable } from '../src/extraction/index.js';
 
@@ -59,9 +59,9 @@ describe('ExtractionPipelineRegistry E2E', () => {
 
     const customMdPipeline: ExtractionPipeline = {
       contentTypes: ['text/markdown'],
-      async extract(input: ExtractionInput): Promise<ExtractionOutput> {
+      async extract(input: ExtractionInput): Promise<ConverterOutput> {
         const md = await readFile(input.filePath, 'utf-8');
-        return { mdIntermediate: md, triples: [], provenance: [] };
+        return { mdIntermediate: md };
       },
     };
 
@@ -113,8 +113,6 @@ describe.skipIf(!markitdownAvailable)('MarkItDown E2E — real file conversion',
     expect(result.mdIntermediate).toBeTruthy();
     expect(result.mdIntermediate).toContain('Research Paper');
     expect(result.mdIntermediate).toContain('decentralized knowledge graphs');
-    expect(result.triples).toEqual([]);
-    expect(result.provenance).toEqual([]);
   });
 
   it('converts a CSV file to Markdown', async () => {
@@ -144,7 +142,6 @@ describe.skipIf(!markitdownAvailable)('MarkItDown E2E — real file conversion',
     });
 
     expect(typeof result.mdIntermediate).toBe('string');
-    expect(result.triples).toEqual([]);
   });
 
   it('processes file through registry lookup → extract', async () => {
@@ -207,7 +204,7 @@ describe('Full extraction pipeline simulation', () => {
       contentTypes: ['text/markdown'],
       async extract(input) {
         const md = await readFile(input.filePath, 'utf-8');
-        return { mdIntermediate: md, triples: [], provenance: [] };
+        return { mdIntermediate: md };
       },
     };
 
@@ -277,15 +274,13 @@ describe('Full extraction pipeline simulation', () => {
 
     const registry = new ExtractionPipelineRegistry();
 
-    // Register a mock HTML pipeline
+    // Register a mock HTML pipeline (Phase 1 converter — mdIntermediate only)
     registry.register({
       contentTypes: ['text/html'],
       async extract(input) {
         const content = await readFile(input.filePath, 'utf-8');
         return {
           mdIntermediate: content.replace(/<[^>]+>/g, ''),
-          triples: [{ subject: 'urn:sales:q4', predicate: 'rdf:type', object: 'schema:Report' }],
-          provenance: [],
         };
       },
     });
@@ -299,19 +294,24 @@ describe('Full extraction pipeline simulation', () => {
       agentDid: 'did:dkg:agent:0xSales',
     });
 
+    // Phase 2 (simulated): the route handler would run the Markdown extractor
+    // on `result.mdIntermediate` to produce triples/provenance.
+    const phase2Triples = [{ subject: 'urn:sales:q4', predicate: 'rdf:type', object: 'schema:Report' }];
+
     // Build the import-file response as the daemon would
     const importFileResponse = {
       assertionUri: 'did:dkg:context-graph:sales/assertion/0xSales/q4-report',
       fileHash: 'sha256:abc123',
       detectedContentType: 'text/html',
       extraction: {
-        status: result.triples.length > 0 ? 'completed' as const : 'skipped' as const,
-        tripleCount: result.triples.length,
+        status: phase2Triples.length > 0 ? 'completed' as const : 'skipped' as const,
+        tripleCount: phase2Triples.length,
         mdIntermediateHash: 'sha256:def456',
         pipelineUsed: 'text/html',
       },
     };
 
+    expect(result.mdIntermediate).toContain('Q4 Sales');
     expect(importFileResponse.extraction.status).toBe('completed');
     expect(importFileResponse.extraction.tripleCount).toBe(1);
     expect(importFileResponse.extraction.pipelineUsed).toBe('text/html');
diff --git a/packages/cli/test/extraction-markdown.test.ts b/packages/cli/test/extraction-markdown.test.ts
new file mode 100644
index 000000000..863c67792
--- /dev/null
+++ b/packages/cli/test/extraction-markdown.test.ts
@@ -0,0 +1,562 @@
+import { describe, it, expect } from 'vitest';
+import { extractFromMarkdown } from '../src/extraction/markdown-extractor.js';
+
+const AGENT = 'did:dkg:agent:0xAbC123';
+const FIXED_NOW = new Date('2026-04-10T12:00:00Z');
+
+const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
+const SCHEMA_NAME = 'http://schema.org/name';
+const SCHEMA_DESCRIPTION = 'http://schema.org/description';
+const SCHEMA_MENTIONS = 'http://schema.org/mentions';
+const SCHEMA_KEYWORDS = 'http://schema.org/keywords';
+const DKG_HAS_SECTION = 'http://dkg.io/ontology/hasSection';
+const DKG_EXTRACTION_PROVENANCE = 'http://dkg.io/ontology/ExtractionProvenance';
+const PROV_WAS_GENERATED_BY = 'http://www.w3.org/ns/prov#wasGeneratedBy';
+const XSD_BOOLEAN = 'http://www.w3.org/2001/XMLSchema#boolean';
+const XSD_DATE = 'http://www.w3.org/2001/XMLSchema#date';
+const XSD_DATE_TIME = 'http://www.w3.org/2001/XMLSchema#dateTime';
+const XSD_DECIMAL = 'http://www.w3.org/2001/XMLSchema#decimal';
+const XSD_INTEGER = 'http://www.w3.org/2001/XMLSchema#integer';
+
+describe('extractFromMarkdown — frontmatter', () => {
+  it('extracts rdf:type from frontmatter `type` key (schema.org convention)', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: climate-report-2026\ntype: Report\n---\n\n# Climate Report\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:climate-report-2026');
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: RDF_TYPE,
+      object: 'http://schema.org/Report',
+    });
+  });
+
+  it('extracts full IRI `type` without namespacing', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `---\nid: x\ntype: https://example.org/ontology/Thing\n---\n\n# X\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples.some(t => t.predicate === RDF_TYPE && t.object === 'https://example.org/ontology/Thing')).toBe(true);
+  });
+
+  it('maps `title` to schema:name and `description` to schema:description', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `---\nid: doc-1\ntitle: Hello World\ndescription: A short doc\n---\n\nBody.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: 'urn:dkg:md:doc-1', predicate: SCHEMA_NAME, object: '"Hello World"' });
+    expect(triples).toContainEqual({ subject: 'urn:dkg:md:doc-1', predicate: SCHEMA_DESCRIPTION, object: '"A short doc"' });
+  });
+
+  it('normalizes unsafe frontmatter keys and bare type values into safe schema IRIs', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: doc-1\ntype: Research Report\nrelease date: 2026-04-10\nauthor(s): Alice\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: RDF_TYPE,
+      object: 'http://schema.org/ResearchReport',
+    });
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/releaseDate',
+      object: `"2026-04-10"^^<${XSD_DATE}>`,
+    });
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/authors',
+      object: '"Alice"',
+    });
+  });
+
+  it('emits one triple per element for array values in frontmatter', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `---\nid: doc\nauthors:\n  - Alice\n  - Bob\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const authors = triples.filter(t => t.predicate === 'http://schema.org/authors');
+    expect(authors.map(t => t.object).sort()).toEqual(['"Alice"', '"Bob"']);
+  });
+
+  it('emits typed literals for numeric and boolean YAML scalars', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `---\nid: doc\npageCount: 42\nscore: 3.14\npublished: true\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({
+      subject: 'urn:dkg:md:doc',
+      predicate: 'http://schema.org/pageCount',
+      object: `"42"^^<${XSD_INTEGER}>`,
+    });
+    expect(triples).toContainEqual({
+      subject: 'urn:dkg:md:doc',
+      predicate: 'http://schema.org/score',
+      object: `"3.14"^^<${XSD_DECIMAL}>`,
+    });
+    expect(triples).toContainEqual({
+      subject: 'urn:dkg:md:doc',
+      predicate: 'http://schema.org/published',
+      object: `"true"^^<${XSD_BOOLEAN}>`,
+    });
+  });
+
+  it('emits xsd:dateTime for YAML timestamps with a time component', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `---\nid: doc\nupdatedAt: 2026-04-10T15:45:30Z\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({
+      subject: 'urn:dkg:md:doc',
+      predicate: 'http://schema.org/updatedAt',
+      object: `"2026-04-10T15:45:30.000Z"^^<${XSD_DATE_TIME}>`,
+    });
+  });
+
+  it('ignores frontmatter with invalid YAML (fallthrough to body)', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: {broken yaml\n---\n\n# Fallback\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    // Subject should derive from the H1 because frontmatter is rejected
+    expect(subjectIri).toBe('urn:dkg:md:fallback');
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_NAME, object: '"Fallback"' });
+  });
+});
+
+describe('extractFromMarkdown — wikilinks', () => {
+  it('extracts bare wikilinks', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Doc\n\nSee [[Alice]] and [[Bob]] for details.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_MENTIONS, object: 'urn:dkg:md:alice' });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_MENTIONS, object: 'urn:dkg:md:bob' });
+  });
+
+  it('extracts piped wikilinks `[[Target|alt]]`', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `# Doc\n\nSee [[Charlie Chocolate|Charlie]].\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples.some(t => t.predicate === SCHEMA_MENTIONS && t.object === 'urn:dkg:md:charlie-chocolate')).toBe(true);
+  });
+
+  it('deduplicates wikilinks', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `# Doc\n\n[[Alice]] [[Alice]] [[Alice]]\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const mentions = triples.filter(t => t.predicate === SCHEMA_MENTIONS);
+    expect(mentions).toHaveLength(1);
+  });
+
+  it('ignores wikilinks inside code fences and derives H1 from visible markdown only', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `\`\`\`md\n# Hidden Title\n[[Hidden Target]]\n\`\`\`\n\n# Visible Title\n\nSee [[Visible Target]].\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:visible-title');
+    const mentions = triples.filter(t => t.predicate === SCHEMA_MENTIONS).map(t => t.object);
+    expect(mentions).toEqual(['urn:dkg:md:visible-target']);
+  });
+
+  it('ignores variable-length info-string fences across structural extraction passes', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `\`\`\`\`md\n# Hidden Title\n[[Hidden Target]]\n#hidden\nfield:: hidden\n\`\`\`\`\n\n# Visible Title\n\n[[Visible Target]] #visible\nfield:: shown\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:visible-title');
+    expect(triples.filter(t => t.predicate === SCHEMA_MENTIONS).map(t => t.object)).toEqual([
+      'urn:dkg:md:visible-target',
+    ]);
+    expect(triples.filter(t => t.predicate === SCHEMA_KEYWORDS).map(t => t.object)).toEqual([
+      '"visible"',
+    ]);
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/field',
+      object: '"shown"',
+    });
+    expect(triples).not.toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/field',
+      object: '"hidden"',
+    });
+  });
+
+  it('ignores fences indented by up to three spaces', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `  \`\`\`md\n  # Hidden Title\n  [[Hidden Target]]\n  #hidden\n  field:: hidden\n  \`\`\`\n\n# Visible Title\n\n[[Visible Target]] #visible\nfield:: shown\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:visible-title');
+    expect(triples.filter(t => t.predicate === SCHEMA_MENTIONS).map(t => t.object)).toEqual([
+      'urn:dkg:md:visible-target',
+    ]);
+    expect(triples.filter(t => t.predicate === SCHEMA_KEYWORDS).map(t => t.object)).toEqual([
+      '"visible"',
+    ]);
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/field',
+      object: '"shown"',
+    });
+    expect(triples).not.toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/field',
+      object: '"hidden"',
+    });
+  });
+});
+
+describe('extractFromMarkdown — hashtags', () => {
+  it('extracts hashtags as schema:keywords', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Doc\n\nSome text #climate #policy and more.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_KEYWORDS, object: '"climate"' });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_KEYWORDS, object: '"policy"' });
+  });
+
+  it('does not treat markdown headings as hashtags', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `# Title\n\n## Section\n\nBody without tags.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const keywords = triples.filter(t => t.predicate === SCHEMA_KEYWORDS);
+    expect(keywords).toHaveLength(0);
+  });
+
+  it('ignores hashtags inside code fences', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `# Doc\n\n\`\`\`bash\n# a comment #notatag\n\`\`\`\n\nBody #realtag here.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const keywords = triples.filter(t => t.predicate === SCHEMA_KEYWORDS).map(t => t.object);
+    expect(keywords).toContain('"realtag"');
+    expect(keywords).not.toContain('"notatag"');
+    expect(keywords).not.toContain('"a"');
+  });
+});
+
+describe('extractFromMarkdown — Dataview inline fields', () => {
+  it('extracts `key:: value` lines', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Doc\n\nauthor:: Alice\nstatus:: draft\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: 'http://schema.org/author', object: '"Alice"' });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: 'http://schema.org/status', object: '"draft"' });
+  });
+
+  it('extracts inline `key:: value` fields embedded in prose', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Doc\n\nSentence with status:: draft\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: 'http://schema.org/status',
+      object: '"draft"',
+    });
+  });
+
+  it('preserves IRI values as IRIs (not literals)', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Doc\n\nhomepage:: https://example.org/home\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: 'http://schema.org/homepage', object: 'https://example.org/home' });
+  });
+
+  it('ignores dataview-like syntax inside code fences', () => {
+    const { triples } = extractFromMarkdown({
+      markdown: `# Doc\n\n\`\`\`\nfake:: not a field\n\`\`\`\n\nreal:: value\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const dataview = triples.filter(t => t.predicate.startsWith('http://schema.org/'));
+    expect(dataview.some(t => t.predicate === 'http://schema.org/real')).toBe(true);
+    expect(dataview.some(t => t.predicate === 'http://schema.org/fake')).toBe(false);
+  });
+});
+
+describe('extractFromMarkdown — headings', () => {
+  it('preserves heading nesting by attaching deeper headings to their nearest parent section', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Title\n\n## Intro\n\n## Methods\n\n### Sub-method\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const rootSections = triples.filter(t => t.subject === subjectIri && t.predicate === DKG_HAS_SECTION);
+    expect(rootSections).toHaveLength(2);
+    expect(rootSections.map(t => t.object)).toEqual([
+      `${subjectIri}#section-1-intro`,
+      `${subjectIri}#section-2-methods`,
+    ]);
+    expect(triples).toContainEqual({
+      subject: `${subjectIri}#section-2-methods`,
+      predicate: DKG_HAS_SECTION,
+      object: `${subjectIri}#section-3-sub-method`,
+    });
+    for (const section of [...rootSections, {
+      subject: `${subjectIri}#section-2-methods`,
+      predicate: DKG_HAS_SECTION,
+      object: `${subjectIri}#section-3-sub-method`,
+    }]) {
+      expect(triples.some(t => t.subject === section.object && t.predicate === SCHEMA_NAME)).toBe(true);
+    }
+  });
+
+  it('disambiguates repeated headings by prefixing a stable section index', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# Title\n\n## Overview\n\nText.\n\n## Overview\n\nMore text.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const sections = triples.filter(t => t.predicate === DKG_HAS_SECTION).map(t => t.object);
+    expect(sections).toEqual([
+      `${subjectIri}#section-1-overview`,
+      `${subjectIri}#section-2-overview`,
+    ]);
+  });
+
+  it('H1 promotes to schema:name on the document subject', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# My Document\n\nBody.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: SCHEMA_NAME, object: '"My Document"' });
+  });
+
+  it('H1 does not overwrite an explicit frontmatter title', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: x\ntitle: Explicit Title\n---\n\n# Different H1\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const names = triples.filter(t => t.subject === subjectIri && t.predicate === SCHEMA_NAME);
+    expect(names).toHaveLength(1);
+    expect(names[0].object).toBe('"Explicit Title"');
+  });
+});
+
+describe('extractFromMarkdown — subject IRI resolution', () => {
+  it('prefers explicit documentIri input', () => {
+    const { subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: ignored\n---\n\n# H1 Also Ignored\n`,
+      agentDid: AGENT,
+      documentIri: 'did:dkg:context-graph:foo/assertion/0xabc/mydoc',
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('did:dkg:context-graph:foo/assertion/0xabc/mydoc');
+  });
+
+  it('uses frontmatter id as-is when it looks like an IRI', () => {
+    const { subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: https://example.org/thing/42\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('https://example.org/thing/42');
+  });
+
+  it('slugifies a frontmatter id that is not an IRI', () => {
+    const { subjectIri } = extractFromMarkdown({
+      markdown: `---\nid: My Great Document!\n---\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:my-great-document');
+  });
+
+  it('falls back to slugified H1 when no id is present', () => {
+    const { subjectIri } = extractFromMarkdown({
+      markdown: `# A Title of Things\n\nBody.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toBe('urn:dkg:md:a-title-of-things');
+  });
+
+  it('uses a hash fallback when non-ASCII titles and headings would slugify to empty strings', () => {
+    const { triples, subjectIri } = extractFromMarkdown({
+      markdown: `# 東京\n\nSee [[大阪]].\n\n## 感想\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri).toMatch(/^urn:dkg:md:hash-[0-9a-f]{12}$/);
+    const mentions = triples.filter(t => t.predicate === SCHEMA_MENTIONS).map(t => t.object);
+    expect(mentions).toEqual([expect.stringMatching(/^urn:dkg:md:hash-[0-9a-f]{12}$/)]);
+    const sections = triples.filter(t => t.predicate === DKG_HAS_SECTION).map(t => t.object);
+    expect(sections).toEqual([expect.stringMatching(new RegExp(`^${subjectIri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}#section-1-hash-[0-9a-f]{12}$`))]);
+  });
+
+  it('produces a stable anonymous fallback when there is no title', () => {
+    const { subjectIri } = extractFromMarkdown({
+      markdown: `Just a body. No headings, no frontmatter.\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(subjectIri.startsWith('urn:dkg:md:anonymous-')).toBe(true);
+  });
+
+  it('derives anonymous fallback subjects from the full body instead of a shared prefix', () => {
+    const first = extractFromMarkdown({
+      markdown: `Shared prefix line\nBut a different ending A\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    const second = extractFromMarkdown({
+      markdown: `Shared prefix line\nBut a different ending B\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(first.subjectIri).not.toBe(second.subjectIri);
+    expect(first.subjectIri).toMatch(/^urn:dkg:md:anonymous-[0-9a-f]{12}$/);
+    expect(second.subjectIri).toMatch(/^urn:dkg:md:anonymous-[0-9a-f]{12}$/);
+  });
+});
+
+describe('extractFromMarkdown — provenance', () => {
+  it('emits a single provenance block when triples are produced', () => {
+    const { triples, provenance } = extractFromMarkdown({
+      markdown: `# Doc\n\n#tag1\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples.length).toBeGreaterThan(0);
+    expect(provenance.length).toBeGreaterThan(0);
+    expect(provenance).toContainEqual(expect.objectContaining({
+      predicate: RDF_TYPE,
+      object: DKG_EXTRACTION_PROVENANCE,
+    }));
+    // Back-link from subject to provenance
+    expect(provenance.some(q => q.predicate === PROV_WAS_GENERATED_BY)).toBe(true);
+  });
+
+  it('emits no provenance when no triples are extracted', () => {
+    const { triples, provenance } = extractFromMarkdown({
+      markdown: ``,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(triples).toHaveLength(0);
+    expect(provenance).toHaveLength(0);
+  });
+
+  it('records the extracting agent DID in provenance', () => {
+    const { provenance } = extractFromMarkdown({
+      markdown: `# Doc\n\n#tag\n`,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+    expect(provenance.some(q => q.object === AGENT)).toBe(true);
+  });
+});
+
+describe('extractFromMarkdown — end-to-end', () => {
+  it('handles a full document with frontmatter, H1, tags, wikilinks, dataview, and sections', () => {
+    const markdown = `---
+id: research-note
+type: ScholarlyArticle
+title: On Decentralized Knowledge Graphs
+description: Exploring DKG fundamentals
+authors:
+  - Alice
+  - Bob
+---
+
+# On Decentralized Knowledge Graphs
+
+status:: draft
+topic:: knowledge graphs
+
+This note discusses [[Decentralized Identifiers]] and [[RDF]] concepts.
+
+It covers #knowledge-graphs and #dkg topics in depth.
+
+## Background
+
+Some background.
+
+## Methods
+
+Our method relies on [[SPARQL]] queries.
+`;
+    const { triples, provenance, subjectIri } = extractFromMarkdown({
+      markdown,
+      agentDid: AGENT,
+      now: FIXED_NOW,
+    });
+
+    expect(subjectIri).toBe('urn:dkg:md:research-note');
+
+    // Type
+    expect(triples).toContainEqual({
+      subject: subjectIri,
+      predicate: RDF_TYPE,
+      object: 'http://schema.org/ScholarlyArticle',
+    });
+
+    // Name from frontmatter title (NOT from H1 since title is set)
+    expect(triples.filter(t => t.predicate === SCHEMA_NAME && t.subject === subjectIri)).toEqual([
+      { subject: subjectIri, predicate: SCHEMA_NAME, object: '"On Decentralized Knowledge Graphs"' },
+    ]);
+
+    // Authors
+    const authors = triples.filter(t => t.predicate === 'http://schema.org/authors').map(t => t.object);
+    expect(authors).toContain('"Alice"');
+    expect(authors).toContain('"Bob"');
+
+    // Dataview fields
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: 'http://schema.org/status', object: '"draft"' });
+    expect(triples).toContainEqual({ subject: subjectIri, predicate: 'http://schema.org/topic', object: '"knowledge graphs"' });
+
+    // Wikilinks
+    const mentions = triples.filter(t => t.predicate === SCHEMA_MENTIONS).map(t => t.object);
+    expect(mentions).toContain('urn:dkg:md:decentralized-identifiers');
+    expect(mentions).toContain('urn:dkg:md:rdf');
+    expect(mentions).toContain('urn:dkg:md:sparql');
+
+    // Tags
+    const tags = triples.filter(t => t.predicate === SCHEMA_KEYWORDS).map(t => t.object);
+    expect(tags).toContain('"knowledge-graphs"');
+    expect(tags).toContain('"dkg"');
+
+    // Sections
+    const sections = triples.filter(t => t.predicate === DKG_HAS_SECTION).map(t => t.object);
+    expect(sections).toEqual([
+      `${subjectIri}#section-1-background`,
+      `${subjectIri}#section-2-methods`,
+    ]);
+
+    // Provenance present
+    expect(provenance.length).toBeGreaterThan(0);
+    expect(provenance.some(q => q.object === AGENT)).toBe(true);
+  });
+});
diff --git a/packages/cli/test/extraction-markitdown.test.ts b/packages/cli/test/extraction-markitdown.test.ts
index 5da39770d..26b749992 100644
--- a/packages/cli/test/extraction-markitdown.test.ts
+++ b/packages/cli/test/extraction-markitdown.test.ts
@@ -60,7 +60,7 @@ describe('MarkItDownConverter', () => {
     expect(converter.contentTypes.length).toBeGreaterThanOrEqual(6);
   });
 
-  it('extract returns mdIntermediate with empty triples (phase 1 only)', async () => {
+  it('extract returns ConverterOutput with mdIntermediate only (phase 1)', async () => {
     const converter = new MarkItDownConverter();
 
     // If markitdown is not available, the extract call should throw
@@ -89,9 +89,9 @@ describe('MarkItDownConverter', () => {
 
       expect(typeof result.mdIntermediate).toBe('string');
       expect(result.mdIntermediate.length).toBeGreaterThan(0);
-      // Phase 1 only — triples are produced by the Markdown extraction pipeline
-      expect(result.triples).toEqual([]);
-      expect(result.provenance).toEqual([]);
+      // Phase 1 only — converter returns ConverterOutput, no triples/provenance.
+      expect((result as { triples?: unknown }).triples).toBeUndefined();
+      expect((result as { provenance?: unknown }).provenance).toBeUndefined();
     } finally {
       await rm(tmpDir, { recursive: true, force: true });
     }
diff --git a/packages/cli/test/extraction-status.test.ts b/packages/cli/test/extraction-status.test.ts
new file mode 100644
index 000000000..de274b674
--- /dev/null
+++ b/packages/cli/test/extraction-status.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from 'vitest';
+import {
+  MAX_EXTRACTION_STATUS_RECORDS,
+  pruneExtractionStatusRecords,
+  type ExtractionStatusRecord,
+} from '../src/extraction-status.js';
+
+const BASE_MS = Date.UTC(2026, 3, 10, 12, 0, 0);
+
+function makeRecord(status: ExtractionStatusRecord['status'], index: number): ExtractionStatusRecord {
+  const startedAt = new Date(BASE_MS + (index * 1000)).toISOString();
+  return {
+    status,
+    fileHash: `sha256:${index.toString(16).padStart(64, '0')}`,
+    detectedContentType: 'text/markdown',
+    pipelineUsed: status === 'skipped' ? null : 'text/markdown',
+    tripleCount: 0,
+    startedAt,
+    ...(status === 'in_progress' ? {} : { completedAt: new Date(BASE_MS + (index * 1000) + 500).toISOString() }),
+  };
+}
+
+describe('extraction-status pruning', () => {
+  it('does not evict in-progress records when only active jobs remain above capacity', () => {
+    const status = new Map<string, ExtractionStatusRecord>();
+    for (let i = 0; i < MAX_EXTRACTION_STATUS_RECORDS + 1; i += 1) {
+      status.set(`assertion-${i}`, makeRecord('in_progress', i));
+    }
+
+    pruneExtractionStatusRecords(status, BASE_MS + ((MAX_EXTRACTION_STATUS_RECORDS + 2) * 1000));
+
+    expect(status.size).toBe(MAX_EXTRACTION_STATUS_RECORDS + 1);
+    expect(status.has('assertion-0')).toBe(true);
+    expect([...status.values()].every(record => record.status === 'in_progress')).toBe(true);
+  });
+
+  it('evicts completed records before active ones when capacity is exceeded', () => {
+    const status = new Map<string, ExtractionStatusRecord>();
+    for (let i = 0; i < 5; i += 1) {
+      status.set(`active-${i}`, makeRecord('in_progress', i));
+    }
+    for (let i = 0; i < MAX_EXTRACTION_STATUS_RECORDS; i += 1) {
+      status.set(`completed-${i}`, makeRecord('completed', i + 10));
+    }
+
+    pruneExtractionStatusRecords(status, BASE_MS + ((MAX_EXTRACTION_STATUS_RECORDS + 20) * 1000));
+
+    expect(status.size).toBe(MAX_EXTRACTION_STATUS_RECORDS);
+    for (let i = 0; i < 5; i += 1) {
+      expect(status.has(`active-${i}`)).toBe(true);
+    }
+  });
+});
diff --git a/packages/cli/test/file-store.test.ts b/packages/cli/test/file-store.test.ts
new file mode 100644
index 000000000..d7b399c1a
--- /dev/null
+++ b/packages/cli/test/file-store.test.ts
@@ -0,0 +1,155 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, readdir, rm, readFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { createHash } from 'node:crypto';
+import { FileStore } from '../src/file-store.js';
+
+let rootDir: string;
+
+beforeEach(async () => {
+  rootDir = await mkdtemp(join(tmpdir(), 'dkg-filestore-test-'));
+});
+
+afterEach(async () => {
+  await rm(rootDir, { recursive: true, force: true });
+});
+
+describe('FileStore.put', () => {
+  it('stores bytes and returns a sha256 hash with the sha256: prefix', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('hello world', 'utf-8');
+    const expectedHex = createHash('sha256').update(bytes).digest('hex');
+
+    const entry = await store.put(bytes, 'text/plain');
+
+    expect(entry.hash).toBe(`sha256:${expectedHex}`);
+    expect(entry.size).toBe(11);
+    expect(entry.contentType).toBe('text/plain');
+  });
+
+  it('writes content to a two-level sharded path (ab/cdef...)', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('some content', 'utf-8');
+    const expectedHex = createHash('sha256').update(bytes).digest('hex');
+
+    const entry = await store.put(bytes, 'text/plain');
+
+    const expectedPath = join(rootDir, expectedHex.slice(0, 2), expectedHex.slice(2));
+    expect(entry.path).toBe(expectedPath);
+    const onDisk = await readFile(expectedPath);
+    expect(onDisk.equals(bytes)).toBe(true);
+  });
+
+  it('is idempotent — putting the same bytes twice yields the same hash', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('idempotent', 'utf-8');
+
+    const first = await store.put(bytes, 'text/plain');
+    const second = await store.put(bytes, 'application/octet-stream');
+
+    expect(first.hash).toBe(second.hash);
+    expect(first.path).toBe(second.path);
+    // contentType on the returned entry reflects the caller, not persisted metadata
+    expect(first.contentType).toBe('text/plain');
+    expect(second.contentType).toBe('application/octet-stream');
+  });
+
+  it('leaves only the final blob after repeated puts of the same content', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('atomic-write', 'utf-8');
+
+    const first = await store.put(bytes, 'text/plain');
+    const second = await store.put(bytes, 'text/plain');
+
+    expect(second.path).toBe(first.path);
+    const shardEntries = await readdir(join(rootDir, first.hash.slice('sha256:'.length, 'sha256:'.length + 2)));
+    expect(shardEntries).toEqual([first.hash.slice('sha256:'.length + 2)]);
+  });
+
+  it('handles empty input', async () => {
+    const store = new FileStore(rootDir);
+    const entry = await store.put(Buffer.alloc(0), 'application/octet-stream');
+    expect(entry.size).toBe(0);
+    // sha256 of empty string is well-known
+    expect(entry.hash).toBe('sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855');
+  });
+
+  it('handles binary content with arbitrary bytes', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x0a, 0x0d]);
+    const entry = await store.put(bytes, 'application/octet-stream');
+    const onDisk = await readFile(entry.path);
+    expect(onDisk.equals(bytes)).toBe(true);
+  });
+});
+
+describe('FileStore.get', () => {
+  it('returns the bytes for a stored hash', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('retrievable', 'utf-8');
+    const { hash } = await store.put(bytes, 'text/plain');
+
+    const retrieved = await store.get(hash);
+    expect(retrieved).not.toBeNull();
+    expect(retrieved!.equals(bytes)).toBe(true);
+  });
+
+  it('returns null for a hash that was never stored', async () => {
+    const store = new FileStore(rootDir);
+    const bogusHex = 'a'.repeat(64);
+    const retrieved = await store.get(`sha256:${bogusHex}`);
+    expect(retrieved).toBeNull();
+  });
+
+  it('accepts bare hex or sha256:-prefixed hashes', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('both forms', 'utf-8');
+    const { hash } = await store.put(bytes, 'text/plain');
+    const bareHex = hash.slice('sha256:'.length);
+
+    const viaPrefixed = await store.get(hash);
+    const viaBare = await store.get(bareHex);
+
+    expect(viaPrefixed).not.toBeNull();
+    expect(viaBare).not.toBeNull();
+    expect(viaPrefixed!.equals(viaBare!)).toBe(true);
+  });
+
+  it('returns null for malformed hash strings', async () => {
+    const store = new FileStore(rootDir);
+    expect(await store.get('not-a-hash')).toBeNull();
+    expect(await store.get('sha256:tooshort')).toBeNull();
+    expect(await store.get('sha256:' + 'z'.repeat(64))).toBeNull(); // non-hex chars
+    expect(await store.get('')).toBeNull();
+  });
+});
+
+describe('FileStore.has', () => {
+  it('returns true for stored hashes and false otherwise', async () => {
+    const store = new FileStore(rootDir);
+    const bytes = Buffer.from('presence check', 'utf-8');
+    const { hash } = await store.put(bytes, 'text/plain');
+
+    expect(await store.has(hash)).toBe(true);
+    expect(await store.has('sha256:' + 'b'.repeat(64))).toBe(false);
+    expect(await store.has('bad-hash')).toBe(false);
+  });
+});
+
+describe('FileStore.hashToPath', () => {
+  it('resolves a hash to an absolute sharded path without touching disk', () => {
+    const store = new FileStore(rootDir);
+    const hex = '1234567890abcdef'.repeat(4);
+    expect(hex.length).toBe(64);
+
+    const path = store.hashToPath(`sha256:${hex}`);
+    expect(path).toBe(join(rootDir, hex.slice(0, 2), hex.slice(2)));
+  });
+
+  it('returns null for malformed hashes', () => {
+    const store = new FileStore(rootDir);
+    expect(store.hashToPath('not-a-hash')).toBeNull();
+    expect(store.hashToPath('sha256:short')).toBeNull();
+  });
+});
diff --git a/packages/cli/test/import-file-integration.test.ts b/packages/cli/test/import-file-integration.test.ts
new file mode 100644
index 000000000..1b6f038c7
--- /dev/null
+++ b/packages/cli/test/import-file-integration.test.ts
@@ -0,0 +1,1070 @@
+/**
+ * Integration tests for the POST /api/assertion/:name/import-file orchestration.
+ *
+ * These tests exercise the full Phase 1 → Phase 2 → assertion.write pipeline
+ * without spinning up a full DKGAgent (which needs libp2p + chain). Instead
+ * we drive the exact sequence of operations the route handler does:
+ *
+ *   1. parseMultipart(body, boundary)
+ *   2. fileStore.put(filePart.content, detectedContentType)
+ *   3. branch on detectedContentType:
+ *        - text/markdown → raw bytes as mdIntermediate
+ *        - registered converter → converter.extract(...)
+ *        - neither → graceful degrade, status="skipped"
+ *   4. extractFromMarkdown({ markdown, agentDid, ontologyRef, documentIri })
+ *   5. mockAgent.assertion.write(contextGraphId, name, triples)
+ *   6. record in extractionStatus Map
+ *
+ * The mock agent captures the assertion.write call arguments for verification.
+ * The real FileStore (on a temp dir), real extractionRegistry, real
+ * extractFromMarkdown, real parseMultipart are all used.
+ *
+ * This covers the same behaviors the daemon route handler implements, minus the
+ * HTTP parsing/validation shell (which is tested indirectly via the multipart
+ * unit tests plus the bits the daemon compiles against).
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, rm, readFile } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { existsSync } from 'node:fs';
+import {
+  ExtractionPipelineRegistry,
+  type ExtractionPipeline,
+  type ExtractionInput,
+  type ConverterOutput,
+  contextGraphAssertionUri,
+} from '@origintrail-official/dkg-core';
+import { FileStore } from '../src/file-store.js';
+import type { ExtractionStatusRecord } from '../src/extraction-status.js';
+import { parseBoundary, parseMultipart } from '../src/http/multipart.js';
+import { extractFromMarkdown } from '../src/extraction/markdown-extractor.js';
+
+// ── Test fixture types (mirroring the ExtractionStatusRecord in daemon.ts) ──
+
+interface CapturedAssertionWrite {
+  contextGraphId: string;
+  name: string;
+  triples: Array<{ subject: string; predicate: string; object: string }>;
+  subGraphName?: string;
+}
+
+interface MockAgent {
+  peerId: string;
+  listSubGraphs: (contextGraphId: string) => Promise<Array<{ name: string }>>;
+  assertion: {
+    create: (
+      contextGraphId: string,
+      name: string,
+      opts?: { subGraphName?: string },
+    ) => Promise<string>;
+    write: (
+      contextGraphId: string,
+      name: string,
+      triples: Array<{ subject: string; predicate: string; object: string }>,
+      opts?: { subGraphName?: string },
+    ) => Promise<void>;
+  };
+  capturedWrites: CapturedAssertionWrite[];
+  createdAssertions: Array<{ contextGraphId: string; name: string; subGraphName?: string }>;
+}
+
+interface MockAgentOptions {
+  createError?: Error;
+  writeError?: Error;
+  registeredSubGraphs?: string[];
+}
+
+function makeMockAgent(peerId = '0xMockAgentPeerId', options: MockAgentOptions = {}): MockAgent {
+  const capturedWrites: CapturedAssertionWrite[] = [];
+  const createdAssertions: Array<{ contextGraphId: string; name: string; subGraphName?: string }> = [];
+  return {
+    peerId,
+    capturedWrites,
+    createdAssertions,
+    async listSubGraphs(): Promise<Array<{ name: string }>> {
+      return (options.registeredSubGraphs ?? []).map(name => ({ name }));
+    },
+    assertion: {
+      async create(contextGraphId: string, name: string, opts?: { subGraphName?: string }): Promise<string> {
+        if (options.createError) throw options.createError;
+        createdAssertions.push({ contextGraphId, name, subGraphName: opts?.subGraphName });
+        return contextGraphAssertionUri(contextGraphId, peerId, name, opts?.subGraphName);
+      },
+      async write(
+        contextGraphId: string,
+        name: string,
+        triples: Array<{ subject: string; predicate: string; object: string }>,
+        opts?: { subGraphName?: string },
+      ): Promise<void> {
+        if (options.writeError) throw options.writeError;
+        capturedWrites.push({ contextGraphId, name, triples, subGraphName: opts?.subGraphName });
+      },
+    },
+  };
+}
+
+// ── The orchestration under test (matches daemon.ts import-file handler) ──
+
+interface ImportFileResult {
+  assertionUri: string;
+  fileHash: string;
+  detectedContentType: string;
+  extraction: {
+    status: 'completed' | 'skipped' | 'failed';
+    tripleCount: number;
+    pipelineUsed: string | null;
+    mdIntermediateHash?: string;
+    error?: string;
+  };
+}
+
+class ImportFileRouteError extends Error {
+  readonly statusCode: number;
+  readonly body: ImportFileResult;
+
+  constructor(statusCode: number, body: ImportFileResult) {
+    super(body.extraction.error ?? `Import-file request failed with status ${statusCode}`);
+    this.statusCode = statusCode;
+    this.body = body;
+  }
+}
+
+function buildImportFileResponse(args: {
+  assertionUri: string;
+  fileHash: string;
+  detectedContentType: string;
+  extraction: ImportFileResult['extraction'];
+}): ImportFileResult {
+  return {
+    assertionUri: args.assertionUri,
+    fileHash: args.fileHash,
+    detectedContentType: args.detectedContentType,
+    extraction: {
+      status: args.extraction.status,
+      tripleCount: args.extraction.tripleCount,
+      pipelineUsed: args.extraction.pipelineUsed,
+      ...(args.extraction.mdIntermediateHash ? { mdIntermediateHash: args.extraction.mdIntermediateHash } : {}),
+      ...(args.extraction.error ? { error: args.extraction.error } : {}),
+    },
+  };
+}
+
+function normalizeDetectedContentType(contentType: string | undefined): string {
+  const normalized = contentType?.split(';', 1)[0]?.trim().toLowerCase();
+  return normalized && normalized.length > 0 ? normalized : 'application/octet-stream';
+}
+
+async function runImportFileOrchestration(params: {
+  agent: MockAgent;
+  fileStore: FileStore;
+  extractionRegistry: ExtractionPipelineRegistry;
+  extractionStatus: Map<string, ExtractionStatusRecord>;
+  multipartBody: Buffer;
+  boundary: string;
+  assertionName: string;
+  onInProgress?: (assertionUri: string, record: ExtractionStatusRecord) => void | Promise<void>;
+}): Promise<ImportFileResult> {
+  const { agent, fileStore, extractionRegistry, extractionStatus, multipartBody, boundary, assertionName, onInProgress } = params;
+
+  const fields = parseMultipart(multipartBody, boundary);
+  const filePart = fields.find(f => f.name === 'file' && f.filename !== undefined)!;
+  const textField = (name: string): string | undefined => {
+    const f = fields.find(x => x.name === name && x.filename === undefined);
+    return f ? f.content.toString('utf-8') : undefined;
+  };
+  const contextGraphId = textField('contextGraphId')!;
+  const contentTypeOverrideRaw = textField('contentType');
+  // Mirror the daemon: blank `contentType=` is treated as absent.
+  const contentTypeOverride =
+    contentTypeOverrideRaw && contentTypeOverrideRaw.trim().length > 0
+      ? contentTypeOverrideRaw
+      : undefined;
+  const ontologyRef = textField('ontologyRef');
+  const subGraphName = textField('subGraphName');
+  const detectedContentType = normalizeDetectedContentType(contentTypeOverride ?? filePart.contentType);
+  if (subGraphName) {
+    const registeredSubGraphs = await agent.listSubGraphs(contextGraphId);
+    if (!registeredSubGraphs.some(subGraph => subGraph.name === subGraphName)) {
+      throw new Error(`Sub-graph "${subGraphName}" has not been registered in context graph "${contextGraphId}". Call createSubGraph() first.`);
+    }
+  }
+
+  const fileStoreEntry = await fileStore.put(filePart.content, detectedContentType);
+  const assertionUri = contextGraphAssertionUri(contextGraphId, agent.peerId, assertionName, subGraphName);
+  const startedAt = new Date().toISOString();
+
+  let mdIntermediate: string | null = null;
+  let pipelineUsed: string | null = null;
+  let mdIntermediateHash: string | undefined;
+  const recordInProgress = async (): Promise<void> => {
+    const record: ExtractionStatusRecord = {
+      status: 'in_progress',
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      pipelineUsed,
+      tripleCount: 0,
+      ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+      startedAt,
+    };
+    extractionStatus.set(assertionUri, record);
+    if (onInProgress) {
+      await onInProgress(assertionUri, record);
+    }
+  };
+  const recordFailed = (error: string, tripleCount: number, failedPipelineUsed: string | null = pipelineUsed): void => {
+    extractionStatus.set(assertionUri, {
+      status: 'failed',
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      pipelineUsed: failedPipelineUsed,
+      tripleCount,
+      ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+      error,
+      startedAt,
+      completedAt: new Date().toISOString(),
+    });
+  };
+  const fail = (statusCode: number, error: string, tripleCount: number, failedPipelineUsed: string | null = pipelineUsed): never => {
+    recordFailed(error, tripleCount, failedPipelineUsed);
+    throw new ImportFileRouteError(statusCode, buildImportFileResponse({
+      assertionUri,
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      extraction: {
+        status: 'failed',
+        tripleCount,
+        pipelineUsed: failedPipelineUsed,
+        ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+        error,
+      },
+    }));
+  };
+
+  await recordInProgress();
+
+  if (detectedContentType === 'text/markdown') {
+    mdIntermediate = filePart.content.toString('utf-8');
+    pipelineUsed = 'text/markdown';
+    await recordInProgress();
+  } else {
+    const converter = extractionRegistry.get(detectedContentType);
+    if (converter) {
+      const { mdIntermediate: md } = await converter.extract({
+        filePath: fileStoreEntry.path,
+        contentType: detectedContentType,
+        ontologyRef,
+        agentDid: `did:dkg:agent:${agent.peerId}`,
+      });
+      mdIntermediate = md;
+      pipelineUsed = detectedContentType;
+      const mdEntry = await fileStore.put(Buffer.from(md, 'utf-8'), 'text/markdown');
+      mdIntermediateHash = mdEntry.hash;
+      await recordInProgress();
+    }
+  }
+
+  // Graceful degrade
+  if (mdIntermediate === null) {
+    const skippedRecord: ExtractionStatusRecord = {
+      status: 'skipped',
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      pipelineUsed: null,
+      tripleCount: 0,
+      startedAt,
+      completedAt: new Date().toISOString(),
+    };
+    extractionStatus.set(assertionUri, skippedRecord);
+    return buildImportFileResponse({
+      assertionUri,
+      fileHash: fileStoreEntry.hash,
+      detectedContentType,
+      extraction: { status: 'skipped', tripleCount: 0, pipelineUsed: null },
+    });
+  }
+
+  // Phase 2
+  let triples: ReturnType<typeof extractFromMarkdown>['triples'];
+  let provenance: ReturnType<typeof extractFromMarkdown>['provenance'];
+  try {
+    const result = extractFromMarkdown({
+      markdown: mdIntermediate,
+      agentDid: `did:dkg:agent:${agent.peerId}`,
+      ontologyRef,
+      documentIri: assertionUri,
+    });
+    triples = result.triples;
+    provenance = result.provenance;
+  } catch (err: any) {
+    fail(500, `Phase 2 extraction failed: ${err.message}`, 0);
+  }
+
+  const allTriples = [...triples, ...provenance];
+  try {
+    try {
+      await agent.assertion.create(contextGraphId, assertionName, subGraphName ? { subGraphName } : undefined);
+    } catch (err: any) {
+      const message = err?.message ?? String(err);
+      if (!(message.includes('already exists') || message.includes('duplicate') || message.includes('conflict'))) {
+        if (message.includes('has not been registered') || message.includes('Invalid') || message.includes('Unsafe')) {
+          fail(400, message, triples.length);
+        }
+        fail(500, message, triples.length);
+      }
+    }
+    if (allTriples.length > 0) {
+      await agent.assertion.write(
+        contextGraphId,
+        assertionName,
+        allTriples.map(t => ({ subject: t.subject, predicate: t.predicate, object: t.object })),
+        subGraphName ? { subGraphName } : undefined,
+      );
+    }
+  } catch (err: any) {
+    if (err.message?.includes('has not been registered') || err.message?.includes('Invalid') || err.message?.includes('Unsafe')) {
+      fail(400, err.message, triples.length);
+    }
+    // Unexpected write-stage failure: mirror the daemon by recording the
+    // failure before rethrowing, so the extraction status map doesn't stay
+    // stuck at in_progress.
+    recordFailed(err?.message ?? String(err), triples.length);
+    throw err;
+  }
+
+  const completedRecord: ExtractionStatusRecord = {
+    status: 'completed',
+    fileHash: fileStoreEntry.hash,
+    detectedContentType,
+    pipelineUsed,
+    tripleCount: triples.length,
+    mdIntermediateHash,
+    startedAt,
+    completedAt: new Date().toISOString(),
+  };
+  extractionStatus.set(assertionUri, completedRecord);
+
+  return buildImportFileResponse({
+    assertionUri,
+    fileHash: fileStoreEntry.hash,
+    detectedContentType,
+    extraction: {
+      status: 'completed',
+      tripleCount: triples.length,
+      pipelineUsed,
+      ...(mdIntermediateHash ? { mdIntermediateHash } : {}),
+    },
+  });
+}
+
+// ── Multipart body builder for tests ──
+
+const BOUNDARY = '----dkgimporttest';
+const CRLF = '\r\n';
+
+function buildMultipart(parts: Array<
+  | { kind: 'text'; name: string; value: string }
+  | { kind: 'file'; name: string; filename: string; contentType: string; content: Buffer }
+>): Buffer {
+  const segments: Buffer[] = [];
+  for (const p of parts) {
+    segments.push(Buffer.from(`--${BOUNDARY}${CRLF}`));
+    if (p.kind === 'text') {
+      segments.push(Buffer.from(`Content-Disposition: form-data; name="${p.name}"${CRLF}${CRLF}${p.value}`));
+    } else {
+      segments.push(Buffer.from(
+        `Content-Disposition: form-data; name="${p.name}"; filename="${p.filename}"${CRLF}` +
+        `Content-Type: ${p.contentType}${CRLF}${CRLF}`,
+      ));
+      segments.push(p.content);
+    }
+    segments.push(Buffer.from(CRLF));
+  }
+  segments.push(Buffer.from(`--${BOUNDARY}--${CRLF}`));
+  return Buffer.concat(segments);
+}
+
+// ── Tests ──
+
+describe('import-file orchestration — happy paths', () => {
+  let tmpDir: string;
+  let fileStore: FileStore;
+  let registry: ExtractionPipelineRegistry;
+  let status: Map<string, ExtractionStatusRecord>;
+  let agent: MockAgent;
+
+  beforeEach(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'dkg-importfile-test-'));
+    fileStore = new FileStore(join(tmpDir, 'files'));
+    registry = new ExtractionPipelineRegistry();
+    status = new Map();
+    agent = makeMockAgent();
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  it('text/markdown upload — skips Phase 1, runs Phase 2, writes triples to assertion', async () => {
+    const markdown = [
+      '---',
+      'id: research-note',
+      'type: ScholarlyArticle',
+      'title: Climate Report 2026',
+      'description: A short climate analysis',
+      '---',
+      '',
+      '# Climate Report 2026',
+      '',
+      'Global temperature rose by 1.2°C. See [[Paris Agreement]] and #climate topics.',
+      '',
+      '## Background',
+      '',
+      'status:: draft',
+      '',
+      '## Methods',
+      '',
+      'Sampled historical records.',
+      '',
+    ].join('\n');
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'research-cg' },
+      { kind: 'file', name: 'file', filename: 'climate.md', contentType: 'text/markdown', content: Buffer.from(markdown, 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'climate-report',
+    });
+
+    // Response shape
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('text/markdown');
+    expect(result.extraction.tripleCount).toBeGreaterThan(0);
+    expect(result.fileHash).toMatch(/^sha256:[0-9a-f]{64}$/);
+    expect(result.detectedContentType).toBe('text/markdown');
+    expect(result.extraction.mdIntermediateHash).toBeUndefined(); // no Phase 1, no MD intermediate stored separately
+    expect(result.assertionUri).toBe(contextGraphAssertionUri('research-cg', agent.peerId, 'climate-report'));
+
+    // Assertion write happened
+    expect(agent.createdAssertions).toHaveLength(1);
+    expect(agent.createdAssertions[0]).toEqual({ contextGraphId: 'research-cg', name: 'climate-report', subGraphName: undefined });
+    expect(agent.capturedWrites).toHaveLength(1);
+    expect(agent.capturedWrites[0].contextGraphId).toBe('research-cg');
+    expect(agent.capturedWrites[0].name).toBe('climate-report');
+
+    // Triples reflect the markdown structure
+    const writtenTriples = agent.capturedWrites[0].triples;
+    // rdf:type ScholarlyArticle
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' &&
+      t.object === 'http://schema.org/ScholarlyArticle',
+    )).toBe(true);
+    // schema:name from frontmatter title
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://schema.org/name' &&
+      t.object === '"Climate Report 2026"',
+    )).toBe(true);
+    // wikilink mention
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://schema.org/mentions' &&
+      t.object === 'urn:dkg:md:paris-agreement',
+    )).toBe(true);
+    // hashtag as keyword
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://schema.org/keywords' &&
+      t.object === '"climate"',
+    )).toBe(true);
+    // dataview field
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://schema.org/status' &&
+      t.object === '"draft"',
+    )).toBe(true);
+    // section headings
+    expect(writtenTriples.some(t =>
+      t.predicate === 'http://dkg.io/ontology/hasSection',
+    )).toBe(true);
+
+    // Status map populated
+    expect(status.size).toBe(1);
+    const record = status.get(result.assertionUri)!;
+    expect(record.status).toBe('completed');
+    expect(record.fileHash).toBe(result.fileHash);
+    expect(record.pipelineUsed).toBe('text/markdown');
+    expect(record.tripleCount).toBe(result.extraction.tripleCount);
+  });
+
+  it('text/markdown upload uses filePart content type when contentType field is not provided', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'doc',
+    });
+
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('text/markdown');
+    expect(result.detectedContentType).toBe('text/markdown');
+  });
+
+  it('normalizes markdown media types with parameters and casing before Phase 1 routing', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'Text/Markdown; charset=utf-8', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'doc',
+    });
+
+    expect(result.detectedContentType).toBe('text/markdown');
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('text/markdown');
+  });
+
+  it('contentType text field overrides the file part Content-Type header', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'contentType', value: 'text/markdown' },
+      // File reports application/octet-stream, but the override tells the handler to treat it as markdown
+      { kind: 'file', name: 'file', filename: 'doc.bin', contentType: 'application/octet-stream', content: Buffer.from('# Hello\n\nWorld.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'override-test',
+    });
+
+    expect(result.detectedContentType).toBe('text/markdown');
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('text/markdown');
+  });
+
+  it('registered converter path — runs Phase 1, stores MD intermediate, runs Phase 2', async () => {
+    // Register a stub converter for application/pdf that converts "fake-pdf" bytes to real markdown
+    const stubConverter: ExtractionPipeline = {
+      contentTypes: ['application/pdf'],
+      async extract(_input: ExtractionInput): Promise<ConverterOutput> {
+        return {
+          mdIntermediate: [
+            '---',
+            'id: stub-doc',
+            'type: Report',
+            '---',
+            '',
+            '# Stub Document',
+            '',
+            'Body with #tag1 and [[Reference]].',
+            '',
+          ].join('\n'),
+        };
+      },
+    };
+    registry.register(stubConverter);
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'research' },
+      { kind: 'file', name: 'file', filename: 'paper.pdf', contentType: 'application/pdf', content: Buffer.from('fake-pdf-bytes', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'paper',
+    });
+
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('application/pdf');
+    expect(result.extraction.mdIntermediateHash).toBeDefined();
+    expect(result.extraction.mdIntermediateHash).toMatch(/^sha256:[0-9a-f]{64}$/);
+    expect(result.extraction.mdIntermediateHash).not.toBe(result.fileHash); // stored separately
+
+    // MD intermediate is retrievable from the file store
+    const mdBytes = await fileStore.get(result.extraction.mdIntermediateHash!);
+    expect(mdBytes).not.toBeNull();
+    expect(mdBytes!.toString('utf-8')).toContain('# Stub Document');
+
+    // Triples reflect the Phase 2 extraction of the stub's MD intermediate
+    const triples = agent.capturedWrites[0].triples;
+    expect(triples.some(t => t.object === 'http://schema.org/Report')).toBe(true);
+    expect(triples.some(t => t.object === '"tag1"')).toBe(true);
+    expect(triples.some(t => t.object === 'urn:dkg:md:reference')).toBe(true);
+  });
+
+  it('normalizes converter media types before registry lookup', async () => {
+    const stubConverter: ExtractionPipeline = {
+      contentTypes: ['application/pdf'],
+      async extract(_input: ExtractionInput): Promise<ConverterOutput> {
+        return { mdIntermediate: '# Converted\n\nBody.\n' };
+      },
+    };
+    registry.register(stubConverter);
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'research' },
+      { kind: 'file', name: 'file', filename: 'paper.pdf', contentType: 'Application/PDF; charset=binary', content: Buffer.from('fake-pdf-bytes', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'paper-normalized',
+    });
+
+    expect(result.detectedContentType).toBe('application/pdf');
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('application/pdf');
+    expect(result.extraction.mdIntermediateHash).toBeDefined();
+  });
+
+  it('passes ontologyRef through to the converter and Phase 2 extractor', async () => {
+    let capturedOntologyRef: string | undefined;
+    const stubConverter: ExtractionPipeline = {
+      contentTypes: ['application/pdf'],
+      async extract(input: ExtractionInput): Promise<ConverterOutput> {
+        capturedOntologyRef = input.ontologyRef;
+        return { mdIntermediate: '# Doc\n\nBody.\n' };
+      },
+    };
+    registry.register(stubConverter);
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'research' },
+      { kind: 'text', name: 'ontologyRef', value: 'did:dkg:context-graph:research/_ontology' },
+      { kind: 'file', name: 'file', filename: 'paper.pdf', contentType: 'application/pdf', content: Buffer.from('pdf', 'utf-8') },
+    ]);
+
+    await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'paper',
+    });
+
+    expect(capturedOntologyRef).toBe('did:dkg:context-graph:research/_ontology');
+  });
+
+  it('passes subGraphName through to assertion.create and assertion.write', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      registeredSubGraphs: ['decisions'],
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'subGraphName', value: 'decisions' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'decision-1',
+    });
+
+    expect(agent.createdAssertions[0]).toEqual({ contextGraphId: 'cg', name: 'decision-1', subGraphName: 'decisions' });
+    expect(agent.capturedWrites[0].subGraphName).toBe('decisions');
+  });
+
+  it('seeds an in-progress extraction status before the terminal record is written', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    let observedInProgress = false;
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'in-progress-doc',
+      async onInProgress(assertionUri, record) {
+        observedInProgress = true;
+        expect(assertionUri).toBe(contextGraphAssertionUri('cg', agent.peerId, 'in-progress-doc'));
+        expect(record.status).toBe('in_progress');
+        expect(record.completedAt).toBeUndefined();
+        expect(status.get(assertionUri)?.status).toBe('in_progress');
+      },
+    });
+
+    expect(observedInProgress).toBe(true);
+    expect(status.get(result.assertionUri)?.status).toBe('completed');
+  });
+
+  it('creates the assertion graph even when Phase 2 extracts zero triples', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'empty.md', contentType: 'text/markdown', content: Buffer.from('', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'empty-doc',
+    });
+
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.tripleCount).toBe(0);
+    expect(agent.createdAssertions).toHaveLength(1);
+    expect(agent.createdAssertions[0]).toEqual({ contextGraphId: 'cg', name: 'empty-doc', subGraphName: undefined });
+    expect(agent.capturedWrites).toHaveLength(0);
+  });
+
+  it('records failed extraction status when assertion.create rejects an unregistered sub-graph', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      registeredSubGraphs: ['decisions'],
+      createError: new Error('Sub-graph "decisions" has not been registered in context graph "cg". Call createSubGraph() first.'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'subGraphName', value: 'decisions' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    await expect(runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'decision-1',
+    })).rejects.toThrow('has not been registered');
+
+    const assertionUri = contextGraphAssertionUri('cg', agent.peerId, 'decision-1', 'decisions');
+    const record = status.get(assertionUri);
+    expect(record).toBeDefined();
+    expect(record?.status).toBe('failed');
+    expect(record?.error).toContain('has not been registered');
+    expect(record?.tripleCount).toBeGreaterThan(0);
+  });
+
+  it('surfaces non-idempotent assertion.create failures as failed imports', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      createError: new Error('Storage backend unavailable'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'empty.md', contentType: 'text/markdown', content: Buffer.from('', 'utf-8') },
+    ]);
+
+    let caught: unknown;
+    try {
+      await runImportFileOrchestration({
+        agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+        multipartBody: body, boundary: BOUNDARY, assertionName: 'create-runtime-failure',
+      });
+    } catch (err) {
+      caught = err;
+    }
+
+    expect(caught).toBeInstanceOf(ImportFileRouteError);
+    const routeError = caught as ImportFileRouteError;
+    expect(routeError.statusCode).toBe(500);
+    expect(routeError.body.extraction.status).toBe('failed');
+    expect(routeError.body.extraction.error).toBe('Storage backend unavailable');
+
+    const assertionUri = contextGraphAssertionUri('cg', agent.peerId, 'create-runtime-failure');
+    const record = status.get(assertionUri);
+    expect(record?.status).toBe('failed');
+    expect(record?.error).toBe('Storage backend unavailable');
+    expect(record?.tripleCount).toBe(0);
+  });
+
+  it('treats explicit already-exists assertion.create failures as idempotent', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      createError: new Error('Assertion graph already exists'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'create-idempotent',
+    });
+
+    expect(result.extraction.status).toBe('completed');
+    expect(agent.capturedWrites).toHaveLength(1);
+    expect(status.get(result.assertionUri)?.status).toBe('completed');
+  });
+
+  it('rejects an unregistered sub-graph before storing the upload blob', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'subGraphName', value: 'decisions' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    await expect(runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'unregistered-preflight',
+    })).rejects.toThrow('has not been registered');
+
+    expect(existsSync(fileStore.directory)).toBe(false);
+  });
+
+  it('records failed extraction status when assertion.write rejects invalid triples', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      writeError: new Error('Invalid triple object'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    await expect(runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'invalid-write',
+    })).rejects.toThrow('Invalid triple object');
+
+    const assertionUri = contextGraphAssertionUri('cg', agent.peerId, 'invalid-write');
+    const record = status.get(assertionUri);
+    expect(record).toBeDefined();
+    expect(record?.status).toBe('failed');
+    expect(record?.error).toBe('Invalid triple object');
+    expect(record?.tripleCount).toBeGreaterThan(0);
+  });
+
+  it('treats a blank contentType form field as absent and falls back to the file part Content-Type', async () => {
+    // A client that submits `contentType=` (empty string) must NOT downgrade
+    // a real text/markdown upload to application/octet-stream — the empty
+    // override should be ignored and the file part's own Content-Type used.
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'contentType', value: '' },
+      { kind: 'file', name: 'file', filename: 'note.md', contentType: 'text/markdown', content: Buffer.from('# Heading\n\nBody text.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'blank-override',
+    });
+
+    expect(result.detectedContentType).toBe('text/markdown');
+    expect(result.extraction.status).toBe('completed');
+    expect(result.extraction.pipelineUsed).toBe('text/markdown');
+    expect(result.extraction.tripleCount).toBeGreaterThan(0);
+  });
+
+  it('treats a whitespace-only contentType form field as absent', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'text', name: 'contentType', value: '   ' },
+      { kind: 'file', name: 'file', filename: 'note.md', contentType: 'text/markdown', content: Buffer.from('# Heading\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'whitespace-override',
+    });
+
+    expect(result.detectedContentType).toBe('text/markdown');
+    expect(result.extraction.status).toBe('completed');
+  });
+
+  it('records failed extraction status when assertion.write throws an unexpected error', async () => {
+    // Errors that don't match the known has-not-been-registered / Invalid / Unsafe
+    // patterns must still update the extraction status record from in_progress to
+    // failed before the orchestration rethrows. Otherwise /extraction-status would
+    // stay stuck reporting in_progress even though the import already failed.
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      writeError: new Error('Connection refused'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    await expect(runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'unexpected-write',
+    })).rejects.toThrow('Connection refused');
+
+    const assertionUri = contextGraphAssertionUri('cg', agent.peerId, 'unexpected-write');
+    const record = status.get(assertionUri);
+    expect(record).toBeDefined();
+    expect(record?.status).toBe('failed');
+    expect(record?.error).toBe('Connection refused');
+    expect(record?.tripleCount).toBeGreaterThan(0);
+    expect(record?.completedAt).toBeDefined();
+  });
+
+  it('returns the full import-file envelope for write-stage validation failures', async () => {
+    agent = makeMockAgent('0xMockAgentPeerId', {
+      writeError: new Error('Invalid triple object'),
+    });
+
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    let caught: unknown;
+    try {
+      await runImportFileOrchestration({
+        agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+        multipartBody: body, boundary: BOUNDARY, assertionName: 'invalid-write-envelope',
+      });
+    } catch (err) {
+      caught = err;
+    }
+
+    expect(caught).toBeInstanceOf(ImportFileRouteError);
+    const routeError = caught as ImportFileRouteError;
+    expect(routeError.statusCode).toBe(400);
+    expect(routeError.body.assertionUri).toBe(contextGraphAssertionUri('cg', agent.peerId, 'invalid-write-envelope'));
+    expect(routeError.body.fileHash).toMatch(/^sha256:[0-9a-f]{64}$/);
+    expect(routeError.body.detectedContentType).toBe('text/markdown');
+    expect(routeError.body.extraction.status).toBe('failed');
+    expect(routeError.body.extraction.error).toBe('Invalid triple object');
+    expect(routeError.body.extraction.tripleCount).toBeGreaterThan(0);
+  });
+});
+
+describe('import-file orchestration — graceful degrade', () => {
+  let tmpDir: string;
+  let fileStore: FileStore;
+  let registry: ExtractionPipelineRegistry;
+  let status: Map<string, ExtractionStatusRecord>;
+  let agent: MockAgent;
+
+  beforeEach(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'dkg-importfile-test-'));
+    fileStore = new FileStore(join(tmpDir, 'files'));
+    registry = new ExtractionPipelineRegistry();
+    status = new Map();
+    agent = makeMockAgent();
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  it('unregistered content type — stores file, returns status="skipped", writes no triples', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'photo.png', contentType: 'image/png', content: Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]) },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'photo',
+    });
+
+    expect(result.extraction.status).toBe('skipped');
+    expect(result.extraction.tripleCount).toBe(0);
+    expect(result.extraction.pipelineUsed).toBeNull();
+    expect(result.extraction.mdIntermediateHash).toBeUndefined();
+    expect(result.detectedContentType).toBe('image/png');
+
+    // File is still stored (retrievable via fileHash)
+    const retrieved = await fileStore.get(result.fileHash);
+    expect(retrieved).not.toBeNull();
+    expect(retrieved![0]).toBe(0x89); // PNG magic byte preserved
+
+    // No triples written to the assertion
+    expect(agent.createdAssertions).toHaveLength(0);
+    expect(agent.capturedWrites).toHaveLength(0);
+
+    // Status record reflects the skip
+    const record = status.get(result.assertionUri)!;
+    expect(record.status).toBe('skipped');
+    expect(record.pipelineUsed).toBeNull();
+    expect(record.tripleCount).toBe(0);
+  });
+
+  it('unregistered content type with no content-type header — defaults to application/octet-stream and skips', async () => {
+    // File part without a Content-Type header — daemon defaults to application/octet-stream
+    const fileContent = Buffer.from('opaque', 'utf-8');
+    const segments: Buffer[] = [];
+    segments.push(Buffer.from(`--${BOUNDARY}${CRLF}`));
+    segments.push(Buffer.from(`Content-Disposition: form-data; name="contextGraphId"${CRLF}${CRLF}cg`));
+    segments.push(Buffer.from(CRLF));
+    segments.push(Buffer.from(`--${BOUNDARY}${CRLF}`));
+    segments.push(Buffer.from(`Content-Disposition: form-data; name="file"; filename="opaque.bin"${CRLF}${CRLF}`));
+    segments.push(fileContent);
+    segments.push(Buffer.from(CRLF));
+    segments.push(Buffer.from(`--${BOUNDARY}--${CRLF}`));
+    const body = Buffer.concat(segments);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'opaque-upload',
+    });
+
+    expect(result.detectedContentType).toBe('application/octet-stream');
+    expect(result.extraction.status).toBe('skipped');
+    expect(result.extraction.pipelineUsed).toBeNull();
+  });
+});
+
+describe('import-file orchestration — boundary parsing', () => {
+  it('parseBoundary extracts boundary from the daemon-style header', () => {
+    expect(parseBoundary(`multipart/form-data; boundary=${BOUNDARY}`)).toBe(BOUNDARY);
+  });
+
+  it('parseBoundary rejects non-multipart requests', () => {
+    expect(parseBoundary('application/json')).toBeNull();
+  });
+});
+
+describe('import-file orchestration — extraction-status semantics', () => {
+  let tmpDir: string;
+  let fileStore: FileStore;
+  let registry: ExtractionPipelineRegistry;
+  let status: Map<string, ExtractionStatusRecord>;
+  let agent: MockAgent;
+
+  beforeEach(async () => {
+    tmpDir = await mkdtemp(join(tmpdir(), 'dkg-importfile-test-'));
+    fileStore = new FileStore(join(tmpDir, 'files'));
+    registry = new ExtractionPipelineRegistry();
+    status = new Map();
+    agent = makeMockAgent();
+  });
+
+  afterEach(async () => {
+    await rm(tmpDir, { recursive: true, force: true });
+  });
+
+  it('populates the status record with startedAt/completedAt timestamps on success', async () => {
+    const body = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'doc.md', contentType: 'text/markdown', content: Buffer.from('# Title\n\nBody.\n', 'utf-8') },
+    ]);
+
+    const result = await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body, boundary: BOUNDARY, assertionName: 'doc',
+    });
+
+    const record = status.get(result.assertionUri)!;
+    expect(record.startedAt).toBeTruthy();
+    expect(record.completedAt).toBeTruthy();
+    expect(new Date(record.startedAt).getTime()).toBeLessThanOrEqual(new Date(record.completedAt!).getTime());
+  });
+
+  it('keyed by assertionUri — separate imports to different assertions get separate records', async () => {
+    const body1 = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'a.md', contentType: 'text/markdown', content: Buffer.from('# A\n\nBody a.\n', 'utf-8') },
+    ]);
+    const body2 = buildMultipart([
+      { kind: 'text', name: 'contextGraphId', value: 'cg' },
+      { kind: 'file', name: 'file', filename: 'b.md', contentType: 'text/markdown', content: Buffer.from('# B\n\nBody b.\n', 'utf-8') },
+    ]);
+
+    await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body1, boundary: BOUNDARY, assertionName: 'doc-a',
+    });
+    await runImportFileOrchestration({
+      agent, fileStore, extractionRegistry: registry, extractionStatus: status,
+      multipartBody: body2, boundary: BOUNDARY, assertionName: 'doc-b',
+    });
+
+    expect(status.size).toBe(2);
+    const keys = [...status.keys()];
+    expect(keys.some(k => k.endsWith('/doc-a'))).toBe(true);
+    expect(keys.some(k => k.endsWith('/doc-b'))).toBe(true);
+  });
+});
diff --git a/packages/cli/test/multipart.test.ts b/packages/cli/test/multipart.test.ts
new file mode 100644
index 000000000..29202c284
--- /dev/null
+++ b/packages/cli/test/multipart.test.ts
@@ -0,0 +1,236 @@
+import { describe, it, expect } from 'vitest';
+import { parseBoundary, parseMultipart, MultipartParseError } from '../src/http/multipart.js';
+
+const BOUNDARY = '----dkgtestboundary';
+const CRLF = '\r\n';
+
+function buildBody(...parts: Buffer[]): Buffer {
+  const segments: Buffer[] = [];
+  for (const part of parts) {
+    segments.push(Buffer.from(`--${BOUNDARY}${CRLF}`));
+    segments.push(part);
+    segments.push(Buffer.from(CRLF));
+  }
+  segments.push(Buffer.from(`--${BOUNDARY}--${CRLF}`));
+  return Buffer.concat(segments);
+}
+
+function textPart(name: string, value: string): Buffer {
+  return Buffer.from(
+    `Content-Disposition: form-data; name="${name}"${CRLF}${CRLF}${value}`,
+  );
+}
+
+function filePart(name: string, filename: string, contentType: string, content: Buffer): Buffer {
+  const header = Buffer.from(
+    `Content-Disposition: form-data; name="${name}"; filename="${filename}"${CRLF}` +
+    `Content-Type: ${contentType}${CRLF}${CRLF}`,
+  );
+  return Buffer.concat([header, content]);
+}
+
+describe('parseBoundary', () => {
+  it('extracts boundary from a standard header', () => {
+    expect(parseBoundary('multipart/form-data; boundary=abc123')).toBe('abc123');
+  });
+
+  it('extracts quoted boundaries', () => {
+    expect(parseBoundary('multipart/form-data; boundary="abc 123"')).toBe('abc 123');
+  });
+
+  it('is case-insensitive on the media type', () => {
+    expect(parseBoundary('Multipart/Form-Data; boundary=xyz')).toBe('xyz');
+  });
+
+  it('handles boundaries with dashes and punctuation', () => {
+    expect(parseBoundary('multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW')).toBe('----WebKitFormBoundary7MA4YWxkTrZu0gW');
+  });
+
+  it('returns null for missing header', () => {
+    expect(parseBoundary(undefined)).toBeNull();
+  });
+
+  it('returns null for non-multipart content type', () => {
+    expect(parseBoundary('application/json')).toBeNull();
+  });
+
+  it('returns null when boundary parameter is missing', () => {
+    expect(parseBoundary('multipart/form-data')).toBeNull();
+  });
+
+  it('returns null for an array value (duplicated Content-Type headers)', () => {
+    // Node may deliver IncomingHttpHeaders['content-type'] as string[] when
+    // the client sends duplicated headers. Reject as ambiguous so the route
+    // handler returns a clean 400 instead of crashing in toLowerCase().
+    expect(parseBoundary(['multipart/form-data; boundary=abc', 'application/json'])).toBeNull();
+    expect(parseBoundary([] as unknown as string[])).toBeNull();
+  });
+});
+
+describe('parseMultipart — Content-Disposition parameter parsing', () => {
+  it('rejects a part that has only filename= and no name=', () => {
+    // The `name=` parameter regex must be anchored to a real `;` boundary so
+    // it does not silently match the `name=` substring inside `filename=`.
+    // A part with only `filename="x"` should be rejected, not mis-routed as
+    // a field named "x".
+    const malformed = Buffer.concat([
+      Buffer.from(`--${BOUNDARY}${CRLF}`),
+      Buffer.from(`Content-Disposition: form-data; filename="lonely.txt"${CRLF}${CRLF}contents`),
+      Buffer.from(CRLF),
+      Buffer.from(`--${BOUNDARY}--${CRLF}`),
+    ]);
+    expect(() => parseMultipart(malformed, BOUNDARY)).toThrow(MultipartParseError);
+    expect(() => parseMultipart(malformed, BOUNDARY)).toThrow(/without name/);
+  });
+
+  it('parses name= and filename= independently when both are present', () => {
+    const body = buildBody(filePart('attachment', 'doc.pdf', 'application/pdf', Buffer.from('PDF', 'utf-8')));
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].name).toBe('attachment');
+    expect(fields[0].filename).toBe('doc.pdf');
+  });
+
+  it('parses name= when filename= comes first in the Content-Disposition', () => {
+    // Order-independence: filename before name should still work because the
+    // anchored regex looks for `;\s*name=` (or start-of-string) regardless of
+    // position.
+    const body = Buffer.concat([
+      Buffer.from(`--${BOUNDARY}${CRLF}`),
+      Buffer.from(`Content-Disposition: form-data; filename="doc.pdf"; name="attachment"${CRLF}${CRLF}body`),
+      Buffer.from(CRLF),
+      Buffer.from(`--${BOUNDARY}--${CRLF}`),
+    ]);
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].name).toBe('attachment');
+    expect(fields[0].filename).toBe('doc.pdf');
+  });
+});
+
+describe('parseMultipart — text fields', () => {
+  it('extracts a single text field', () => {
+    const body = buildBody(textPart('greeting', 'hello'));
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].name).toBe('greeting');
+    expect(fields[0].filename).toBeUndefined();
+    expect(fields[0].contentType).toBeUndefined();
+    expect(fields[0].content.toString('utf-8')).toBe('hello');
+  });
+
+  it('extracts multiple text fields in order', () => {
+    const body = buildBody(
+      textPart('first', 'one'),
+      textPart('second', 'two'),
+      textPart('third', 'three'),
+    );
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(3);
+    expect(fields.map(f => f.name)).toEqual(['first', 'second', 'third']);
+    expect(fields.map(f => f.content.toString('utf-8'))).toEqual(['one', 'two', 'three']);
+  });
+
+  it('handles empty text field values', () => {
+    const body = buildBody(textPart('empty', ''));
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].content.length).toBe(0);
+  });
+
+  it('preserves CRLF-free text values', () => {
+    const body = buildBody(textPart('iri', 'did:dkg:context-graph:my-cg'));
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields[0].content.toString('utf-8')).toBe('did:dkg:context-graph:my-cg');
+  });
+});
+
+describe('parseMultipart — file fields', () => {
+  it('extracts a file part with filename and content-type', () => {
+    const fileContent = Buffer.from('# Markdown Document\n\nBody text.\n', 'utf-8');
+    const body = buildBody(filePart('file', 'doc.md', 'text/markdown', fileContent));
+
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].name).toBe('file');
+    expect(fields[0].filename).toBe('doc.md');
+    expect(fields[0].contentType).toBe('text/markdown');
+    expect(fields[0].content.equals(fileContent)).toBe(true);
+  });
+
+  it('extracts binary file content without corruption', () => {
+    const binary = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x0a, 0x0d, 0x2d, 0x2d]);
+    const body = buildBody(filePart('file', 'binary.bin', 'application/octet-stream', binary));
+
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields[0].content.equals(binary)).toBe(true);
+  });
+
+  it('does not treat boundary bytes inside file payload as the next multipart boundary', () => {
+    const payload = Buffer.from(`prefix--${BOUNDARY}--suffix`, 'utf-8');
+    const body = buildBody(filePart('file', 'embedded-boundary.bin', 'application/octet-stream', payload));
+
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].content.equals(payload)).toBe(true);
+  });
+
+  it('does not treat CRLF-prefixed boundary-like payload bytes as a real boundary unless followed by CRLF or --', () => {
+    const payload = Buffer.from(`prefix${CRLF}--${BOUNDARY}junk${CRLF}suffix`, 'utf-8');
+    const body = buildBody(filePart('file', 'embedded-delimiter.bin', 'application/octet-stream', payload));
+
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(1);
+    expect(fields[0].content.equals(payload)).toBe(true);
+  });
+
+  it('extracts mixed text and file parts in a single body', () => {
+    const fileContent = Buffer.from('file body', 'utf-8');
+    const body = buildBody(
+      textPart('contextGraphId', 'my-cg'),
+      filePart('file', 'doc.pdf', 'application/pdf', fileContent),
+      textPart('ontologyRef', 'did:dkg:context-graph:my-cg/_ontology'),
+    );
+
+    const fields = parseMultipart(body, BOUNDARY);
+    expect(fields).toHaveLength(3);
+    expect(fields[0].name).toBe('contextGraphId');
+    expect(fields[0].content.toString('utf-8')).toBe('my-cg');
+    expect(fields[1].name).toBe('file');
+    expect(fields[1].filename).toBe('doc.pdf');
+    expect(fields[1].contentType).toBe('application/pdf');
+    expect(fields[1].content.equals(fileContent)).toBe(true);
+    expect(fields[2].name).toBe('ontologyRef');
+    expect(fields[2].content.toString('utf-8')).toBe('did:dkg:context-graph:my-cg/_ontology');
+  });
+});
+
+describe('parseMultipart — error handling', () => {
+  it('throws on empty boundary', () => {
+    expect(() => parseMultipart(Buffer.alloc(0), '')).toThrow(MultipartParseError);
+  });
+
+  it('throws when no opening boundary is present', () => {
+    expect(() => parseMultipart(Buffer.from('random bytes'), BOUNDARY)).toThrow(/Missing opening boundary/);
+  });
+
+  it('throws on missing Content-Disposition header', () => {
+    const badPart = Buffer.from(`Content-Type: text/plain${CRLF}${CRLF}orphaned`);
+    const body = buildBody(badPart);
+    expect(() => parseMultipart(body, BOUNDARY)).toThrow(/missing Content-Disposition/);
+  });
+
+  it('throws on missing header terminator', () => {
+    const delim = `--${BOUNDARY}${CRLF}`;
+    const body = Buffer.concat([
+      Buffer.from(delim),
+      Buffer.from(`Content-Disposition: form-data; name="x"`), // no CRLF CRLF
+    ]);
+    expect(() => parseMultipart(body, BOUNDARY)).toThrow(MultipartParseError);
+  });
+
+  it('throws when a part has no closing boundary', () => {
+    const body = Buffer.from(`--${BOUNDARY}${CRLF}Content-Disposition: form-data; name="x"${CRLF}${CRLF}orphaned`);
+    expect(() => parseMultipart(body, BOUNDARY)).toThrow(MultipartParseError);
+  });
+});
diff --git a/packages/cli/test/skill-endpoint.test.ts b/packages/cli/test/skill-endpoint.test.ts
index 9833aa331..b9ae248fc 100644
--- a/packages/cli/test/skill-endpoint.test.ts
+++ b/packages/cli/test/skill-endpoint.test.ts
@@ -54,10 +54,10 @@ describe('SKILL.md file', () => {
   });
 
   it('starts with Agent Skills YAML frontmatter', () => {
-    expect(skillContent).toMatch(/^---\n/);
+    expect(skillContent).toMatch(/^---\r?\n/);
     expect(skillContent).toContain('name: dkg-node');
     expect(skillContent).toContain('description:');
-    expect(skillContent).toMatch(/---\n\n/);
+    expect(skillContent).toMatch(/---\r?\n\r?\n/);
   });
 
   it('contains the required DKG V10 sections', () => {
@@ -96,9 +96,21 @@ describe('SKILL.md file', () => {
   });
 
   it('marks planned endpoints clearly', () => {
-    expect(skillContent).toContain('🚧 Planned');
+    // The Planned/🚧 markers in the skill doc cover context graph sub-resources
+    // and future agent profile endpoints — NOT the assertion API, which ships
+    // as of PR #108 (create/write/query/promote/discard) and this PR (import-file,
+    // extraction-status).
+    expect(skillContent).toContain('*(planned)*');
+  });
+
+  it('documents the now-shipped assertion API surface', () => {
     expect(skillContent).toContain('/api/assertion/create');
+    expect(skillContent).toContain('/api/assertion/{name}/write');
+    expect(skillContent).toContain('/api/assertion/{name}/query');
+    expect(skillContent).toContain('/api/assertion/{name}/promote');
+    expect(skillContent).toContain('/api/assertion/{name}/discard');
     expect(skillContent).toContain('/api/assertion/{name}/import-file');
+    expect(skillContent).toContain('/api/assertion/{name}/extraction-status');
   });
 
   it('documents error status codes', () => {
diff --git a/packages/core/src/extraction-pipeline.ts b/packages/core/src/extraction-pipeline.ts
index 99459f217..76e97569c 100644
--- a/packages/core/src/extraction-pipeline.ts
+++ b/packages/core/src/extraction-pipeline.ts
@@ -1,6 +1,16 @@
 /**
- * Pluggable extraction pipeline interface for converting non-RDF files
- * (PDF, DOCX, etc.) into Markdown intermediates and RDF triples.
+ * Pluggable extraction pipeline interfaces for the document ingestion flow.
+ *
+ * Two phases:
+ *  - Phase 1 (converter): source file → Markdown intermediate.
+ *    Implemented by ExtractionPipeline (e.g. MarkItDownConverter).
+ *  - Phase 2 (structural extraction): Markdown intermediate → RDF triples.
+ *    Runs directly in the import-file route handler — not through a
+ *    pluggable registry. See 19_MARKDOWN_CONTENT_TYPE.md.
+ *
+ * The route handler orchestrates both phases and returns an
+ * ExtractionOutput that composes Phase 1's mdIntermediate with
+ * Phase 2's triples and provenance.
  *
  * Spec: 05_PROTOCOL_EXTENSIONS.md §6.5
  */
@@ -23,42 +33,61 @@ export interface ExtractionInput {
   agentDid: string;
 }
 
+/**
+ * Phase 1 converter output. A converter is responsible ONLY for turning
+ * a source file into a Markdown intermediate. It does not produce triples.
+ */
+export interface ConverterOutput {
+  /** Markdown intermediate, stored alongside the original file and inspectable. */
+  mdIntermediate: string;
+}
+
+/**
+ * Composite Phase 1 + Phase 2 result produced by the import-file route
+ * handler. `mdIntermediate` is byte-for-byte what the converter returned;
+ * `triples` and `provenance` come from the Phase 2 Markdown extractor.
+ */
 export interface ExtractionOutput {
-  /** Markdown intermediate (stored alongside original, inspectable). */
   mdIntermediate: string;
-  /** Extracted RDF triples. */
   triples: Quad[];
-  /** dkg:ExtractionProvenance quads for semantically extracted triples. */
   provenance: Quad[];
 }
 
 export interface ExtractionPipeline {
-  /** MIME content types this pipeline handles. */
+  /** MIME content types this converter handles. */
   readonly contentTypes: string[];
-  /** Convert a file to Markdown intermediate + RDF triples. */
-  extract(input: ExtractionInput): Promise<ExtractionOutput>;
+  /** Convert a source file into a Markdown intermediate. Phase 1 only. */
+  extract(input: ExtractionInput): Promise<ConverterOutput>;
+}
+
+function normalizeContentType(contentType: string): string {
+  return contentType.split(';', 1)[0]?.trim().toLowerCase() ?? '';
 }
 
 /**
- * Registry that maps content types to extraction pipelines.
- * Nodes register pipelines at startup; the import-file endpoint
- * looks up the pipeline for the detected content type.
+ * Registry that maps content types to converter pipelines.
+ * Nodes register pipelines at startup; the import-file route handler
+ * looks up the pipeline for the detected content type and calls its
+ * Phase 1 `extract()`. Phase 2 is not registered — the handler runs
+ * it directly on the Markdown intermediate.
  */
 export class ExtractionPipelineRegistry {
   private readonly pipelines = new Map<string, ExtractionPipeline>();
 
   register(pipeline: ExtractionPipeline): void {
     for (const ct of pipeline.contentTypes) {
-      this.pipelines.set(ct, pipeline);
+      const normalized = normalizeContentType(ct);
+      if (normalized.length === 0) continue;
+      this.pipelines.set(normalized, pipeline);
     }
   }
 
   get(contentType: string): ExtractionPipeline | undefined {
-    return this.pipelines.get(contentType);
+    return this.pipelines.get(normalizeContentType(contentType));
   }
 
   has(contentType: string): boolean {
-    return this.pipelines.has(contentType);
+    return this.pipelines.has(normalizeContentType(contentType));
   }
 
   availableContentTypes(): string[] {
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index e8cf11798..9880bc37e 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -47,6 +47,7 @@ export {
 export {
   type Quad as ExtractionQuad,
   type ExtractionInput,
+  type ConverterOutput,
   type ExtractionOutput,
   type ExtractionPipeline,
   ExtractionPipelineRegistry,
diff --git a/packages/core/test/extraction-pipeline.test.ts b/packages/core/test/extraction-pipeline.test.ts
index 0d99aee6b..6acd86c4e 100644
--- a/packages/core/test/extraction-pipeline.test.ts
+++ b/packages/core/test/extraction-pipeline.test.ts
@@ -3,17 +3,15 @@ import {
   ExtractionPipelineRegistry,
   type ExtractionPipeline,
   type ExtractionInput,
-  type ExtractionOutput,
+  type ConverterOutput,
 } from '../src/extraction-pipeline.js';
 
-function makePipeline(contentTypes: string[], output?: Partial<ExtractionOutput>): ExtractionPipeline {
+function makePipeline(contentTypes: string[], output?: Partial<ConverterOutput>): ExtractionPipeline {
   return {
     contentTypes,
-    async extract(_input: ExtractionInput): Promise<ExtractionOutput> {
+    async extract(_input: ExtractionInput): Promise<ConverterOutput> {
       return {
         mdIntermediate: output?.mdIntermediate ?? '# Test',
-        triples: output?.triples ?? [],
-        provenance: output?.provenance ?? [],
       };
     },
   };
@@ -71,14 +69,22 @@ describe('ExtractionPipelineRegistry', () => {
     expect(registry.get('text/markdown')).toBe(mdPipeline);
     expect(registry.get('application/pdf')).toBe(pdfPipeline);
   });
+
+  it('normalizes casing and media-type parameters on registration and lookup', () => {
+    const registry = new ExtractionPipelineRegistry();
+    const pipeline = makePipeline(['Application/PDF']);
+    registry.register(pipeline);
+
+    expect(registry.has('application/pdf')).toBe(true);
+    expect(registry.get('APPLICATION/PDF; charset=utf-8')).toBe(pipeline);
+    expect(registry.availableContentTypes()).toEqual(['application/pdf']);
+  });
 });
 
-describe('ExtractionPipeline interface', () => {
-  it('extract returns mdIntermediate, triples, and provenance', async () => {
+describe('ExtractionPipeline interface (Phase 1 converter)', () => {
+  it('extract returns ConverterOutput with mdIntermediate only', async () => {
     const pipeline = makePipeline(['text/markdown'], {
       mdIntermediate: '# Hello\n\nWorld',
-      triples: [{ subject: 'urn:test:1', predicate: 'rdf:type', object: 'schema:Thing' }],
-      provenance: [{ subject: 'urn:prov:1', predicate: 'dkg:extractedBy', object: 'did:dkg:agent:0x123' }],
     });
 
     const result = await pipeline.extract({
@@ -88,9 +94,9 @@ describe('ExtractionPipeline interface', () => {
     });
 
     expect(result.mdIntermediate).toBe('# Hello\n\nWorld');
-    expect(result.triples).toHaveLength(1);
-    expect(result.triples[0].subject).toBe('urn:test:1');
-    expect(result.provenance).toHaveLength(1);
+    // Converter output must not carry triples/provenance — those come from Phase 2.
+    expect((result as { triples?: unknown }).triples).toBeUndefined();
+    expect((result as { provenance?: unknown }).provenance).toBeUndefined();
   });
 
   it('extract passes through ontologyRef when provided', async () => {
@@ -99,7 +105,7 @@ describe('ExtractionPipeline interface', () => {
       contentTypes: ['application/pdf'],
       async extract(input) {
         capturedInput = input;
-        return { mdIntermediate: '', triples: [], provenance: [] };
+        return { mdIntermediate: '' };
       },
     };