diff --git a/koboldcpp/.helmignore b/koboldcpp/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/koboldcpp/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/koboldcpp/Chart.yaml b/koboldcpp/Chart.yaml new file mode 100644 index 000000000..d107f0c51 --- /dev/null +++ b/koboldcpp/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: "1.110" +description: Run AI Models Locally, Free & Open-Source +name: koboldcpp +type: application +version: 1.0.2 \ No newline at end of file diff --git a/koboldcpp/OlaresManifest.yaml b/koboldcpp/OlaresManifest.yaml new file mode 100644 index 000000000..7dbe79b64 --- /dev/null +++ b/koboldcpp/OlaresManifest.yaml @@ -0,0 +1,123 @@ +--- +olaresManifest.version: '0.11.0' +olaresManifest.type: app +apiVersion: 'v2' +metadata: + name: koboldcpp + description: Run AI Models Locally, Free & Open-Source. + icon: https://app.cdn.olares.com/appstore/koboldcpp/icon.png + appid: koboldcpp + version: '1.0.2' + title: KoboldCpp + categories: + - Utilities_v112 + - Productivity +permission: + appData: true + appCache: true + userData: + - Home +spec: + versionName: '1.110' + promoteImage: + - https://app.cdn.olares.com/appstore/koboldcpp/1.webp + - https://app.cdn.olares.com/appstore/koboldcpp/2.webp + - https://app.cdn.olares.com/appstore/koboldcpp/3.webp + fullDescription: | + *KoboldCPP is a powerful, C++ based backend built for running large language models locally using the GGUF format the same format supported by llama.cpp. Originally created to power storytelling and role-playing platforms like KoboldAI, it has grown into a complete local LLM engine capable of handling a wide variety of modern models, including:* + - LLaMA, LLaMA 2, and LLaMA 3 + - Mistral and Mixtral + - Phi and Gemma + - Qwen and Yi + - Many other models converted to GGUF + + *Features* + - Single file executable, with no installation required and no external dependencies + - Runs on CPU or GPU, supports full or partial offloaded + - LLM text generation (Supports all GGML and GGUF models, backwards compatibility with ALL past models) + - Image Generation and Image Editing (Stable Diffusion 1.5, SDXL, SD3, Flux, Qwen Image, Z-Image, Klein) + - Video Generation (WAN 2.2) + - Speech-To-Text (Voice Recognition) via Whisper + - Text-To-Speech (Voice Generation) via Qwen3TTS, Kokoro, OuteTTS, Parler and Dia + - Music Generation (Ace Step 1.5) + - Image Recognition (Multimodal Vision) + - MCP Server support and tool calling + - Provides many compatible APIs endpoints for many popular webservices (KoboldCppApi - OpenAiApi OllamaApi A1111ForgeApi ComfyUiApi WhisperTranscribeApi XttsApi OpenAiSpeechApi) + - Bundled KoboldAI Lite UI with editing tools, save formats, memory, world info, author's note, characters, scenarios. + - Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger) + - Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories. + - Many other features including new samplers, regex support, websearch, RAG via TextDB, image recognition/vision and more. + - Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like Android (via Termux) and Raspberry PI). + developer: KoboldAI + website: https://github.com/LostRuins/koboldcpp + submitter: Olares + locale: + - en-US + - zh-CN + doc: https://github.com/LostRuins/koboldcpp/wiki + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + requiredMemory: 4Gi + limitedMemory: 8Gi + requiredDisk: 5Gi + limitedDisk: 50Gi + requiredCpu: 2.1 + limitedCpu: 6 + requiredGpu: 10Gi + limitedGpu: 24Gi + {{- else }} + requiredMemory: 64Mi + limitedMemory: 500Mi + requiredDisk: 50Mi + limitedDisk: 200Mi + requiredCpu: 10m + limitedCpu: 500m + {{- end }} + supportArch: + - amd64 + subCharts: + - name: koboldcppserver + shared: true + - name: koboldcpp +options: + apiTimeout: 0 + dependencies: + - name: olares + type: system + version: '>=1.12.3-0' + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + {{- else }} + - name: koboldcpp + type: application + version: '>=1.0.0' + mandatory: true + {{- end }} + appScope: + {{- if and .Values.admin .Values.bfl.username (eq .Values.admin .Values.bfl.username) }} + clusterScoped: true + appRef: + - koboldcpp + {{- else }} + clusterScoped: false + {{- end }} +sharedEntrances: + - name: koboldcpp + host: sharedentrances-koboldcpp + port: 0 + title: KoboldCpp API + icon: https://app.cdn.olares.com/appstore/koboldcpp/icon.png + invisible: true + authLevel: internal +entrances: + - authLevel: internal + host: koboldcpp-web-svc + icon: https://app.cdn.olares.com/appstore/koboldcpp/icon.png + name: koboldcpp + openMethod: window + port: 8080 + title: KoboldCpp +envs: + - envName: OLARES_USER_HUGGINGFACE_SERVICE + required: true + applyOnChange: true + valueFrom: + envName: OLARES_USER_HUGGINGFACE_SERVICE diff --git a/koboldcpp/i18n/en-US/OlaresManifest.yaml b/koboldcpp/i18n/en-US/OlaresManifest.yaml new file mode 100644 index 000000000..57b49dd79 --- /dev/null +++ b/koboldcpp/i18n/en-US/OlaresManifest.yaml @@ -0,0 +1,29 @@ +metadata: + title: KoboldCpp + description: Run AI Models Locally, Free & Open-Source. +spec: + fullDescription: | + *KoboldCPP is a powerful, C++ based backend built for running large language models locally using the GGUF format the same format supported by llama.cpp. Originally created to power storytelling and role-playing platforms like KoboldAI, it has grown into a complete local LLM engine capable of handling a wide variety of modern models, including:* + - LLaMA, LLaMA 2, and LLaMA 3 + - Mistral and Mixtral + - Phi and Gemma + - Qwen and Yi + - Many other models converted to GGUF + + *Features* + - Single file executable, with no installation required and no external dependencies + - Runs on CPU or GPU, supports full or partial offloaded + - LLM text generation (Supports all GGML and GGUF models, backwards compatibility with ALL past models) + - Image Generation and Image Editing (Stable Diffusion 1.5, SDXL, SD3, Flux, Qwen Image, Z-Image, Klein) + - Video Generation (WAN 2.2) + - Speech-To-Text (Voice Recognition) via Whisper + - Text-To-Speech (Voice Generation) via Qwen3TTS, Kokoro, OuteTTS, Parler and Dia + - Music Generation (Ace Step 1.5) + - Image Recognition (Multimodal Vision) + - MCP Server support and tool calling + - Provides many compatible APIs endpoints for many popular webservices (KoboldCppApi - OpenAiApi OllamaApi A1111ForgeApi ComfyUiApi WhisperTranscribeApi XttsApi OpenAiSpeechApi) + - Bundled KoboldAI Lite UI with editing tools, save formats, memory, world info, author's note, characters, scenarios. + - Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger) + - Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories. + - Many other features including new samplers, regex support, websearch, RAG via TextDB, image recognition/vision and more. + - Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like Android (via Termux) and Raspberry PI). diff --git a/koboldcpp/i18n/zh-CN/OlaresManifest.yaml b/koboldcpp/i18n/zh-CN/OlaresManifest.yaml new file mode 100644 index 000000000..9079a0fc4 --- /dev/null +++ b/koboldcpp/i18n/zh-CN/OlaresManifest.yaml @@ -0,0 +1,29 @@ +metadata: + title: KoboldCpp + description: 本地运行 AI 模型,免费且开源。 +spec: + fullDescription: | + *KoboldCPP 是一个强大的基于 C++ 的后端,专为本地运行大型语言模型而设计,采用与 llama.cpp 相同支持的 GGUF 格式。最初为像 KoboldAI 这样的故事创作和角色扮演平台打造,如今已发展为一款功能全面的本地大模型引擎,能够支持多种现代模型,包括:* + - LLaMA、LLaMA 2 和 LLaMA 3 + - Mistral 和 Mixtral + - Phi 和 Gemma + - Qwen 和 Yi + - 以及许多其他已转换为 GGUF 格式的模型 + + *功能特色* + - 单文件可执行程序,无需安装,无需任何外部依赖 + - 支持 CPU 或 GPU 运行,可全量或部分转存 + - LLM 文本生成(支持所有 GGML 和 GGUF 模型,同时向下兼容所有旧模型) + - 图像生成与编辑(支持 Stable Diffusion 1.5、SDXL、SD3、Flux、Qwen Image、Z-Image、Klein) + - 视频生成(WAN 2.2) + - 语音转文字(Whisper 语音识别) + - 文字转语音(通过 Qwen3TTS、Kokoro、OuteTTS、Parler 和 Dia 语音生成) + - 音乐生成(Ace Step 1.5) + - 图像识别(多模态视觉) + - 支持 MCP 服务器及工具调用 + - 提供众多流行 Web 服务兼容的 API 端点(KoboldCppApi、OpenAiApi、OllamaApi、A1111ForgeApi、ComfyUiApi、WhisperTranscribeApi、XttsApi、OpenAiSpeechApi) + - 捆绑 KoboldAI Lite UI,内置编辑工具、存档格式、记忆功能、世界信息、作者笔记、角色、场景管理等 + - 包括多种模式(聊天、冒险、指令、故事创作)和多种 UI 主题(美学角色扮演、经典作家、商务助手、信息使者) + - 支持加载 Tavern 角色卡,导入各类站点数据格式,读取/导出 JSON 存档与持久化故事 + - 还包含新采样器、正则表达式支持、网页搜索、基于 TextDB 的 RAG、图像识别/视觉等众多功能 + - 提供适用于 Windows、MacOS、Linux 的即用型二进制文件;支持 Colab、Docker 直接运行,自行编译还支持其他平台(如 Android(Termux)和树莓派) diff --git a/koboldcpp/koboldcpp/Chart.yaml b/koboldcpp/koboldcpp/Chart.yaml new file mode 100644 index 000000000..8b6cf1668 --- /dev/null +++ b/koboldcpp/koboldcpp/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: "latest" +description: description +name: koboldcpp +type: application +version: 1.0.0 diff --git a/koboldcpp/koboldcpp/templates/clientproxy.yaml b/koboldcpp/koboldcpp/templates/clientproxy.yaml new file mode 100644 index 000000000..bd10193dc --- /dev/null +++ b/koboldcpp/koboldcpp/templates/clientproxy.yaml @@ -0,0 +1,120 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-config + namespace: {{ .Release.Namespace }} +data: + nginx.conf: | + upstream app_backend { + server koboldcpp-svc.koboldcppserver-shared:5001 max_fails=1 fail_timeout=2s; + server download-svc.koboldcppserver-shared:8090 backup; + } + + server { + listen 8080; + access_log /opt/bitnami/openresty/nginx/logs/access.log; + error_log /opt/bitnami/openresty/nginx/logs/error.log; + + client_max_body_size 200m; + + location / { + proxy_pass http://app_backend; + proxy_next_upstream error timeout http_500 http_502 http_503 http_504; + proxy_next_upstream_tries 2; + proxy_connect_timeout 2s; + proxy_read_timeout 600s; + proxy_send_timeout 600s; + + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + client_max_body_size 200m; + } + } + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + io.kompose.service: koboldcppweb +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: koboldcppweb + strategy: + type: Recreate + template: + metadata: + labels: + io.kompose.service: koboldcppweb + spec: + volumes: + - name: nginx-config + configMap: + name: nginx-config + defaultMode: 438 + items: + - key: nginx.conf + path: nginx.conf + containers: + - name: nginx + image: docker.io/beclab/aboveos-bitnami-openresty:1.25.3-2 + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: OPENRESTY_CONF_FILE + value: /etc/nginx/nginx.conf + readinessProbe: + exec: + command: + - /bin/sh + - -c + - | + http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080) + [ $http_code -ge 200 ] && [ $http_code -lt 500 ] + initialDelaySeconds: 2 + timeoutSeconds: 3 + periodSeconds: 3 + successThreshold: 1 + failureThreshold: 60 + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 10m + memory: 64Mi + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-config + mountPath: /opt/bitnami/openresty/nginx/conf/server_blocks/nginx.conf + subPath: nginx.conf + +--- +apiVersion: v1 +kind: Service +metadata: + name: koboldcpp-web-svc + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + io.kompose.service: koboldcppweb + ports: + - name: http + protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/koboldcpp/koboldcpp/values.yaml b/koboldcpp/koboldcpp/values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/koboldcpp/koboldcppserver/Chart.yaml b/koboldcpp/koboldcppserver/Chart.yaml new file mode 100644 index 000000000..6d7af6cde --- /dev/null +++ b/koboldcpp/koboldcppserver/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: "latest" +description: description +name: koboldcppserver +type: application +version: 1.0.0 diff --git a/koboldcpp/koboldcppserver/templates/configmap.yaml b/koboldcpp/koboldcppserver/templates/configmap.yaml new file mode 100644 index 000000000..437eeb78c --- /dev/null +++ b/koboldcpp/koboldcppserver/templates/configmap.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: koboldcpp-env + namespace: {{ .Release.Namespace }} +data: + NVIDIA_VISIBLE_DEVICES: "all" + NVIDIA_DRIVER_CAPABILITIES: "compute,utility" + KCPP_MODEL: "" + KCPP_ARGS: "--model /models/Qwen3.5-4B-UD-Q4_K_XL.gguf --contextsize 4096 --usecuda --gpulayers 99 --mmproj /models/mmproj-F32.gguf --sdmodel /models/picX_real.safetensors --sdquant 1 --whispermodel /models/ggml-large-v3-turbo.bin --ttsmodel /models/Qwen3-TTS-12Hz-1.7B-Base-q8_0.gguf --ttswavtokenizer /models/qwen3-tts-tokenizer-q8_0.gguf --ttsgpu --admin --admindir /models/admindir" + KCPP_DONT_UPDATE: "true" + KCPP_DONT_TUNNEL: "true" + KCPP_DONT_REMOVE_MODELS: "true" diff --git a/koboldcpp/koboldcppserver/templates/deployment.yaml b/koboldcpp/koboldcppserver/templates/deployment.yaml new file mode 100644 index 000000000..557cbc0b8 --- /dev/null +++ b/koboldcpp/koboldcppserver/templates/deployment.yaml @@ -0,0 +1,143 @@ + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: koboldcpp-engine + namespace: {{ .Release.Namespace }} + labels: + app: koboldcpp-engine + annotations: + applications.app.bytetrade.io/gpu-inject: "true" +spec: + replicas: 1 + selector: + matchLabels: + app: koboldcpp-engine + strategy: + type: Recreate + template: + metadata: + labels: + app: koboldcpp-engine + spec: + initContainers: + - name: init-wait-models + securityContext: + runAsUser: 0 + runAsGroup: 0 + image: docker.io/beclab/aboveos-busybox:1.37.0 + command: + - sh + - -c + - | + DONE_FILES=".qwen35-4b.done .mmproj-qwen35.done .picx-real.done .whisper.done .tts-model.done .tts-tokenizer.done" + echo "[init] Waiting for 6 model downloads..." + for f in $DONE_FILES; do + while [ ! -f "/models/$f" ]; do + sleep 10 + done + echo "[init] $f OK" + done + echo "[init] All models ready." + mkdir -p /models/admindir + + if [ ! -f /models/admindir/qwen3.5-4b.kcpps ]; then + echo '{"model_param":"/models/Qwen3.5-4B-UD-Q4_K_XL.gguf","port":5001,"port_param":5001,"host":"","launch":false,"threads":4,"contextsize":4096,"gpulayers":99,"usecublas":["normal","0"],"multiuser":true,"skiplauncher":true}' > /models/admindir/qwen3.5-4b.kcpps + echo "[init] Created qwen3.5-4b.kcpps" + fi + volumeMounts: + - name: model-data + mountPath: /models + containers: + - name: koboldcpp-engine + image: docker.io/beclab/harveyff-koboldcpp:1.110 + envFrom: + - configMapRef: + name: koboldcpp-env + resources: + limits: + cpu: 4 + memory: 8Gi + nvidia.com/gpu: 1 + requests: + cpu: 2 + memory: 4Gi + nvidia.com/gpu: 1 + ports: + - containerPort: 5001 + protocol: TCP + volumeMounts: + - name: model-data + mountPath: /models + - name: koboldcpp-data + mountPath: /app/data + - name: koboldcpp-workspace + mountPath: /workspace + startupProbe: + tcpSocket: + port: 5001 + periodSeconds: 15 + timeoutSeconds: 10 + failureThreshold: 80 + livenessProbe: + tcpSocket: + port: 5001 + periodSeconds: 60 + timeoutSeconds: 10 + failureThreshold: 6 + readinessProbe: + tcpSocket: + port: 5001 + periodSeconds: 20 + timeoutSeconds: 10 + failureThreshold: 6 + + restartPolicy: Always + volumes: + - name: model-data + hostPath: + path: "{{ .Values.userspace.userData }}/Huggingface/koboldcpp" + type: DirectoryOrCreate + - name: koboldcpp-data + hostPath: + type: DirectoryOrCreate + path: '{{ .Values.userspace.appData }}' + - name: koboldcpp-workspace + hostPath: + path: "{{ .Values.userspace.userData }}/Huggingface/koboldcpp/workspace" + type: DirectoryOrCreate + +--- +apiVersion: v1 +kind: Service +metadata: + name: koboldcpp-svc + namespace: {{ .Release.Namespace }} + labels: + app: koboldcpp-engine +spec: + ports: + - name: api + port: 5001 + targetPort: 5001 + selector: + app: koboldcpp-engine + type: ClusterIP + +--- +apiVersion: v1 +kind: Service +metadata: + name: sharedentrances-koboldcpp + namespace: {{ .Release.Namespace }} + labels: + app: koboldcpp-engine +spec: + ports: + - name: "5001" + port: 80 + targetPort: 5001 + selector: + app: koboldcpp-engine + type: ClusterIP diff --git a/koboldcpp/koboldcppserver/templates/download.yaml b/koboldcpp/koboldcppserver/templates/download.yaml new file mode 100644 index 000000000..a0c227a52 --- /dev/null +++ b/koboldcpp/koboldcppserver/templates/download.yaml @@ -0,0 +1,65 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-download-models + namespace: {{ .Release.Namespace }} + labels: + app: {{ .Release.Name }}-download-models +spec: + ttlSecondsAfterFinished: 100 + backoffLimit: 3 + template: + metadata: + labels: + app: {{ .Release.Name }}-download-models + spec: + restartPolicy: OnFailure + containers: + - name: download-models + image: "docker.io/harveyff/hf-downloader-only:v0.0.7-amd" + env: + - name: HF_ENDPOINT + value: "{{ .Values.olaresEnv.OLARES_USER_HUGGINGFACE_SERVICE }}" + args: + - "--tasks" + - '{"tasks":[{"repo":"unsloth/Qwen3.5-4B-GGUF","file":"Qwen3.5-4B-UD-Q4_K_XL.gguf","ref":"main","outDir":"/data/models","doneNameTpl":".qwen35-4b.done"},{"repo":"unsloth/Qwen3.5-4B-GGUF","file":"mmproj-F32.gguf","ref":"main","outDir":"/data/models","doneNameTpl":".mmproj-qwen35.done"},{"repo":"fp16-guy/PicX_real","file":"picX_real.safetensors","ref":"main","outDir":"/data/models","doneNameTpl":".picx-real.done"},{"repo":"ggerganov/whisper.cpp","file":"ggml-large-v3-turbo.bin","ref":"main","outDir":"/data/models","doneNameTpl":".whisper.done"},{"repo":"koboldcpp/tts","file":"Qwen3-TTS-12Hz-1.7B-Base-q8_0.gguf","ref":"main","outDir":"/data/models","doneNameTpl":".tts-model.done"},{"repo":"koboldcpp/tts","file":"qwen3-tts-tokenizer-q8_0.gguf","ref":"main","outDir":"/data/models","doneNameTpl":".tts-tokenizer.done"}]}' + - "--static" + - "/app/static" + - "--port" + - "8090" + - "--probe-url" + - "/api/v1/info/version" + ports: + - containerPort: 8090 + name: http + volumeMounts: + - name: models + mountPath: /data/models + resources: + requests: + cpu: 100m + memory: 200Mi + limits: + cpu: "1" + memory: 1Gi + volumes: + - name: models + hostPath: + path: "{{ .Values.userspace.userData }}/Huggingface/koboldcpp" + type: DirectoryOrCreate + +--- +apiVersion: v1 +kind: Service +metadata: + name: download-svc + namespace: {{ .Release.Namespace }} +spec: + selector: + app: {{ .Release.Name }}-download-models + ports: + - name: download-status + port: 8090 + targetPort: 8090 + type: ClusterIP diff --git a/koboldcpp/koboldcppserver/values.yaml b/koboldcpp/koboldcppserver/values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/koboldcpp/owners b/koboldcpp/owners new file mode 100644 index 000000000..d8e1be557 --- /dev/null +++ b/koboldcpp/owners @@ -0,0 +1,8 @@ +owners: +- 'LittleLollipop' +- 'TShentu' +- 'hysyeah' +- 'pengpeng' +- 'harveyff' +- 'zdf-org' +- 'lovehunter9' diff --git a/koboldcpp/templates/keep b/koboldcpp/templates/keep new file mode 100644 index 000000000..e69de29bb diff --git a/koboldcpp/values.yaml b/koboldcpp/values.yaml new file mode 100644 index 000000000..e69de29bb