-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.server.example.yaml
More file actions
129 lines (114 loc) · 4.43 KB
/
config.server.example.yaml
File metadata and controls
129 lines (114 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Vectorless Server — example configuration.
#
# Every value below has a sensible default. Override with env vars
# (VLS_* for server, VLE_* for engine) or edit this file.
#
# Precedence: env vars > YAML file > built-in defaults.
# ── Server ─────────────────────────────────────────────────────────
server:
addr: ":8080"
read_timeout: 30s
write_timeout: 120s
drain_timeout: 15s
# Direct TLS (optional). Leave empty to terminate TLS at your proxy.
tls:
cert_file: ""
key_file: ""
min_version: "1.2" # "1.2" or "1.3"
# ── Authentication ─────────────────────────────────────────────────
auth:
# "none" — all requests are anonymous (default, for local dev).
# "api_key" — require Authorization: Bearer <key>.
mode: "none"
api_key: "" # set via VLS_AUTH_API_KEY in production
# ── Prometheus Metrics ─────────────────────────────────────────────
metrics:
enabled: true # serves /metrics endpoint
# ── OpenTelemetry Tracing ──────────────────────────────────────────
tracing:
enabled: false
endpoint: "localhost:4317" # OTLP gRPC collector
insecure: true # disable TLS for local dev
service_name: "vectorless-server"
sample_rate: 1.0 # 0.0–1.0; 1.0 = sample everything
# ── Rate Limiting ──────────────────────────────────────────────────
rate_limit:
enabled: false
requests_per_minute: 600
# ── Engine Configuration ───────────────────────────────────────────
# Everything below is passed through to the vectorless engine.
engine:
database:
url: "postgres://vectorless:vectorless@localhost:5432/vectorless?sslmode=disable"
max_conns: 10
storage:
driver: "local" # "local" or "s3"
local:
root: "./data/documents"
# s3:
# endpoint: "http://localhost:9000"
# region: "us-east-1"
# bucket: "vectorless"
# access_key: "minioadmin"
# secret_key: "minioadmin"
# use_path_style: true
queue:
driver: "river" # "river", "qstash", or "asynq"
river:
num_workers: 10
# qstash:
# token: ""
# webhook_base_url: "https://your-server.com"
# current_signing_key: ""
# next_signing_key: ""
# asynq:
# addr: "localhost:6379"
# password: ""
# db: 0
# concurrency: 20
llm:
driver: "anthropic" # "anthropic", "openai", or "gemini"
anthropic:
api_key: "" # set via VLS_ANTHROPIC_API_KEY
model: "claude-sonnet-4-20250514"
reasoning_model: ""
# openai:
# api_key: ""
# model: "gpt-4o"
# reasoning_model: ""
# gemini:
# api_key: ""
# model: "gemini-2.0-flash"
# reasoning_model: ""
retrieval:
strategy: "chunked-tree" # "single-pass" or "chunked-tree"
chunked_tree:
max_tokens_per_call: 60000
max_parallel_calls: 8
include_sibling_breadcrumbs: true
ingest:
# The summarize and HyDE stages run concurrently. This caps the total
# number of LLM calls in flight across both stages combined.
# 0 disables the global cap; default is 12.
global_llm_concurrency: 12
# HyDE candidate-question generation per leaf section. Folded into
# the retrieval prompt at query time to widen recall on queries that
# don't echo the section's exact wording.
hyde:
enabled: true
model: "" # empty => same model as summarization
num_questions: 5
concurrency: 4
# Multi-axis structured summaries (Phase 2.5). JSON-mode summarizer
# returns {topics, entities, numbers, one_line}. The retrieval
# prompt surfaces entities + numbers on the section line; the
# one_line continues to populate the flat `summary` field for
# backward compatibility.
summary_axes:
enabled: true
max_topics: 4
max_entities: 8
max_numbers: 6
log:
level: "info" # "debug", "info", "warn", "error"
format: "json" # "json" or "console"