activate-rag-vllm/local.env.example at main · parallelworks/activate-rag-vllm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# ==============================================================================
# Local Development Configuration for activate-rag-vllm
# ==============================================================================
# Copy this file and customize for your environment:
#   cp local.env.example my-config.env
#   ./run_local.sh --config my-config.env
# ==============================================================================

# ---- Runtime Selection ----
# Options: auto, singularity, docker
# 'auto' will prefer Singularity if available
RUNMODE=auto

# ---- Deployment Type ----
# Options: all (vLLM + RAG), vllm (vLLM only)
RUNTYPE=all

# ---- Model Configuration ----
# Source: 'local' for pre-downloaded models, 'huggingface' to download
MODEL_SOURCE=local

# For local models: full path to model weights directory
MODEL_PATH=/models/Llama-3.1-8B-Instruct

# For HuggingFace models: model ID and optional token
#MODEL_SOURCE=huggingface
#HF_MODEL_ID=meta-llama/Llama-3.1-8B-Instruct
#HF_TOKEN=hf_xxxxxxxxxxxxx

# Where to cache downloaded models
MODEL_CACHE_DIR=~/pw/models

# ---- Service Ports ----
# Set to 0 for auto-assignment, or specify fixed ports
VLLM_SERVER_PORT=0
PROXY_PORT=0
RAG_PORT=0
CHROMA_PORT=0

# ---- RAG Configuration ----
# Directory containing documents to index
DOCS_DIR=./docs

# ---- vLLM Configuration ----
# Extra arguments passed to vLLM server
VLLM_EXTRA_ARGS="--dtype bfloat16 --trust_remote_code"

# For multi-GPU setups, add tensor parallelism:
#VLLM_EXTRA_ARGS="--dtype bfloat16 --trust_remote_code --tensor-parallel-size 4"

# ---- Security ----
# API key for vLLM server (optional)
#API_KEY=your-api-key-here

# ---- Build Options ----
# Set to true to build containers from source instead of using pre-built
BUILD=false

# ---- Debug ----
# Enable debug output
#DEBUG=1