Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: validate

on:
pull_request:
push:
branches: [main]

permissions:
contents: read

jobs:
validate:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.22'

- name: Validate
run: make validate

- name: Release dry run
run: make release-dry-run
40 changes: 40 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
.PHONY: build test validate dist release-dry-run clean

BIN := holmes
DIST_DIR := dist
VERSION ?= 0.1.0-dev
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
GOOS ?= $(shell go env GOOS 2>/dev/null || uname -s | tr A-Z a-z)
GOARCH ?= $(shell go env GOARCH 2>/dev/null || uname -m)
DIST_NAME := $(BIN)_$(VERSION)_$(GOOS)_$(GOARCH)
LDFLAGS := -X main.version=$(VERSION) -X main.commit=$(COMMIT) -X main.date=$(DATE)

build:
mkdir -p bin
go build -ldflags "$(LDFLAGS)" -o bin/$(BIN) ./cmd/holmes

test:
go test ./...

validate: build
python3 tools/validate_holmes.py
bin/$(BIN) --version
bin/$(BIN) doctor
bin/$(BIN) self-test
bin/$(BIN) emit-evidence >/tmp/holmes-evidence.json
bin/$(BIN) analyze examples/sample.txt >/tmp/holmes-analysis.json
bin/$(BIN) search "truth and evidence" >/tmp/holmes-search.json
bin/$(BIN) graph examples/sample.txt >/tmp/holmes-graph.json
bin/$(BIN) govern examples/sample.txt >/tmp/holmes-govern.json

dist: validate
mkdir -p $(DIST_DIR)
cp bin/$(BIN) $(DIST_DIR)/$(DIST_NAME)
(cd $(DIST_DIR) && sha256sum $(DIST_NAME) > $(DIST_NAME).sha256)

release-dry-run: dist
@echo "release dry-run complete: $(DIST_DIR)/$(DIST_NAME)"

clean:
rm -rf bin $(DIST_DIR)
57 changes: 55 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,55 @@
# holmes
Holmes Language Intelligence Fabric: governed classical NLP, neural NLP, semantic search, retrieval, knowledge graphs, foundation-language services, guardrails, evals, and investigative agentic discovery.
# Holmes

Holmes is SocioProphet's open language intelligence fabric.

It is built to outgrow assistant-grade discovery: classical NLP, neural NLP, semantic search, retrieval, knowledge graphs, foundation-language services, guardrails, evals, and investigative agentic discovery under one governed product surface.

## Product thesis

Watson-style systems answer. Holmes investigates.

Holmes is not a chatbot wrapper, a loose model zoo, or a domain NLP repo. It is the governed language layer above search, evidence, retrieval, casefiles, semantic graphs, tools, models, evals, and agents.

## Product family

- **Holmes**: language intelligence fabric.
- **Sherlock Search**: discovery, retrieval, evidence search, and investigation engine.
- **221B**: casefile and workspace surface.
- **Mycroft**: model routing, policy intelligence, and strategic model/service selection.
- **Moriarty Bench**: adversarial eval and red-team harness.
- **Irene Shield**: privacy, masking, identity-sensitive redaction, and sensitive-context handling.
- **The Canon**: curated evidence corpus, provenance records, accepted facts, and source trust.
- **Deduction Engine**: synthesis, contradiction detection, claim extraction, fallacy analysis, and reasoning workflows.

## Layer stack

1. Ingestion: documents, web, OCR handoff, transcripts, tables, metadata, language detection.
2. Linguistic primitives: tokenization, lemmatization, POS, morphology, parsing, NER, normalization.
3. Rule and table techniques: matchers, gazetteers, taxonomies, table extraction, rule-based relation extraction.
4. Classical ML NLP: classifiers, clustering, topic models, sentiment baselines, calibration, explainability.
5. Neural NLP: transformers, embeddings, rerankers, span extraction, relation extraction, multilingual encoders.
6. Foundation language services: extraction, summarization, generation, translation, RAG answering, long-context analysis, tool planning.
7. Retrieval and knowledge: sparse/dense/hybrid retrieval, vector stores, GraphBrain, semantic-serdes, ontogenesis, Slash Topics, Sherlock Search.
8. Guardrails and governance: PII checks, source provenance, prompt-injection checks, policy gates, eval gates, factsheets, promotion records.
9. Agent and tool orchestration: tool contracts, agent identity, sessions, memory, MCP/A2A, execution traces, model routing.

## Repo role

This repo is the Holmes product surface and integration spine.

Normative cross-surface standards live in `SocioProphet/functional-model-surfaces`.
Runtime service deployment should graduate into `SocioProphet/prophet-platform` when contracts and smoke tests are stable.
Linux-native NLP lab execution belongs in `SociOS-Linux/nlplab`.
SourceOS carries clients and signed service references through `SourceOS-Linux/sourceos-model-carry`.

## Initial validation

```bash
make validate
```

Expected result:

```text
OK: Holmes contracts validated
```
234 changes: 234 additions & 0 deletions cmd/holmes/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
package main

import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
)

var (
version = "0.1.0-dev"
commit = "unknown"
date = "unknown"
)

type evidence struct {
Tool string `json:"tool"`
Version string `json:"version"`
Commit string `json:"commit"`
BuildDate string `json:"buildDate"`
Repo string `json:"repo"`
Command string `json:"command"`
Status string `json:"status"`
Details map[string]any `json:"details,omitempty"`
}

type analysisRecord struct {
Tool string `json:"tool"`
Version string `json:"version"`
Status string `json:"status"`
Path string `json:"path"`
Bytes int `json:"bytes"`
SHA256 string `json:"sha256"`
Lines int `json:"lines"`
Words int `json:"words"`
Components []string `json:"components"`
EvidenceRef string `json:"evidenceRef"`
}

func usage() {
fmt.Fprintf(os.Stderr, `holmes %s

Usage:
holmes --version
holmes doctor
holmes self-test
holmes emit-evidence
holmes analyze <path>
holmes search <query>
holmes graph <path>
holmes govern <path>

`, version)
}

func main() {
if len(os.Args) == 1 {
usage()
os.Exit(2)
}
if os.Args[1] == "--version" || os.Args[1] == "version" {
fmt.Printf("holmes %s commit=%s date=%s\n", version, commit, date)
return
}

switch os.Args[1] {
case "doctor":
runDoctor()
case "self-test":
runSelfTest()
case "emit-evidence":
runEvidence("emit-evidence", "ok", map[string]any{"surface": "language-intelligence", "mode": "local"})
case "analyze":
requireArgs(os.Args, 3)
runAnalyze(os.Args[2])
case "search":
requireArgs(os.Args, 3)
runSearch(strings.Join(os.Args[2:], " "))
case "graph":
requireArgs(os.Args, 3)
runGraph(os.Args[2])
case "govern":
requireArgs(os.Args, 3)
runGovern(os.Args[2])
default:
usage()
os.Exit(2)
}
}

func requireArgs(args []string, n int) {
if len(args) < n {
usage()
os.Exit(2)
}
}

func runDoctor() {
details := map[string]any{
"components": []string{"sherlock-search", "221b", "mycroft-router", "moriarty-bench", "irene-shield", "the-canon", "deduction-engine"},
"wired": []string{},
"pending": []string{"sherlock-search", "221b", "mycroft-router", "moriarty-bench", "irene-shield", "the-canon", "deduction-engine"},
}
runEvidence("doctor", "not-yet-wired", details)
}

func runSelfTest() {
if _, err := os.Stat("examples/holmes-surface.json"); err != nil {
runEvidence("self-test", "failed", map[string]any{"error": err.Error()})
os.Exit(1)
}
runEvidence("self-test", "ok", map[string]any{"example": "examples/holmes-surface.json"})
}

func runAnalyze(path string) {
record, err := analyzeFile(path)
if err != nil {
printJSON(map[string]any{"status": "failed", "error": err.Error(), "path": path})
os.Exit(1)
}
printJSON(record)
}

func runSearch(query string) {
printJSON(map[string]any{
"tool": "holmes",
"version": version,
"status": "not-yet-wired",
"query": query,
"engine": "sherlock-search",
"message": "Sherlock Search binding is declared but runtime search is not wired in this CLI skeleton.",
"evidenceId": stableID("search:" + query),
})
}

func runGraph(path string) {
record, err := analyzeFile(path)
if err != nil {
printJSON(map[string]any{"status": "failed", "error": err.Error(), "path": path})
os.Exit(1)
}
printJSON(map[string]any{
"tool": "holmes",
"version": version,
"status": "not-yet-wired",
"path": record.Path,
"sha256": record.SHA256,
"target": "language.graph.v1/ToSemanticGraph",
"message": "Semantic graph conversion is declared but not wired in this CLI skeleton.",
"evidenceId": stableID("graph:" + record.SHA256),
})
}

func runGovern(path string) {
record, err := analyzeFile(path)
if err != nil {
printJSON(map[string]any{"status": "failed", "error": err.Error(), "path": path})
os.Exit(1)
}
printJSON(map[string]any{
"tool": "holmes",
"version": version,
"status": "not-yet-wired",
"path": record.Path,
"sha256": record.SHA256,
"target": "language.govern.v1/Evaluate",
"message": "Governance/eval binding is declared but not wired in this CLI skeleton.",
"evidenceId": stableID("govern:" + record.SHA256),
})
}

func analyzeFile(path string) (analysisRecord, error) {
clean := filepath.Clean(path)
bytes, err := os.ReadFile(clean)
if err != nil {
return analysisRecord{}, err
}
text := string(bytes)
sum := sha256.Sum256(bytes)
record := analysisRecord{
Tool: "holmes",
Version: version,
Status: "demo-analysis",
Path: clean,
Bytes: len(bytes),
SHA256: hex.EncodeToString(sum[:]),
Lines: countLines(text),
Words: len(strings.Fields(text)),
Components: []string{"ingestion", "linguistic-primitives", "evidence"},
}
record.EvidenceRef = "evidence://holmes/" + stableID(record.SHA256)
return record, nil
}

func countLines(text string) int {
if text == "" {
return 0
}
count := strings.Count(text, "\n")
if !strings.HasSuffix(text, "\n") {
count++
}
return count
}

func runEvidence(command, status string, details map[string]any) {
printJSON(evidence{
Tool: "holmes",
Version: version,
Commit: commit,
BuildDate: date,
Repo: "SocioProphet/holmes",
Command: command,
Status: status,
Details: details,
})
}

func stableID(value string) string {
sum := sha256.Sum256([]byte(value))
return hex.EncodeToString(sum[:])[:16]
}

func printJSON(value any) {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
if err := enc.Encode(value); err != nil {
panic(errors.New("failed to encode JSON: " + err.Error()))
}
}
Loading
Loading