-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlocal.env.example
More file actions
61 lines (48 loc) · 1.78 KB
/
local.env.example
File metadata and controls
61 lines (48 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# ==============================================================================
# Local Development Configuration for activate-rag-vllm
# ==============================================================================
# Copy this file and customize for your environment:
# cp local.env.example my-config.env
# ./run_local.sh --config my-config.env
# ==============================================================================
# ---- Runtime Selection ----
# Options: auto, singularity, docker
# 'auto' will prefer Singularity if available
RUNMODE=auto
# ---- Deployment Type ----
# Options: all (vLLM + RAG), vllm (vLLM only)
RUNTYPE=all
# ---- Model Configuration ----
# Source: 'local' for pre-downloaded models, 'huggingface' to download
MODEL_SOURCE=local
# For local models: full path to model weights directory
MODEL_PATH=/models/Llama-3.1-8B-Instruct
# For HuggingFace models: model ID and optional token
#MODEL_SOURCE=huggingface
#HF_MODEL_ID=meta-llama/Llama-3.1-8B-Instruct
#HF_TOKEN=hf_xxxxxxxxxxxxx
# Where to cache downloaded models
MODEL_CACHE_DIR=~/pw/models
# ---- Service Ports ----
# Set to 0 for auto-assignment, or specify fixed ports
VLLM_SERVER_PORT=0
PROXY_PORT=0
RAG_PORT=0
CHROMA_PORT=0
# ---- RAG Configuration ----
# Directory containing documents to index
DOCS_DIR=./docs
# ---- vLLM Configuration ----
# Extra arguments passed to vLLM server
VLLM_EXTRA_ARGS="--dtype bfloat16 --trust_remote_code"
# For multi-GPU setups, add tensor parallelism:
#VLLM_EXTRA_ARGS="--dtype bfloat16 --trust_remote_code --tensor-parallel-size 4"
# ---- Security ----
# API key for vLLM server (optional)
#API_KEY=your-api-key-here
# ---- Build Options ----
# Set to true to build containers from source instead of using pre-built
BUILD=false
# ---- Debug ----
# Enable debug output
#DEBUG=1