-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml
More file actions
117 lines (100 loc) · 3.58 KB
/
config.yaml
File metadata and controls
117 lines (100 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# ============================================================
# 全球化内容供应商 - 配置文件
# 更新日期: 2026-01-23
# ============================================================
app:
name: Video Localization Platform
version: 1.0.0
debug: false
# -------------------- 路径配置 --------------------
paths:
data_dir: ./data
input_dir: ./data/input
output_dir: ./data/output
temp_dir: ./data/temp
models_dir: ./models
logs_dir: ./logs
# -------------------- AI模型配置 --------------------
models:
demucs:
model_name: htdemucs
device: cuda
two_stems: vocals
whisperx:
model_size: large-v2
device: cuda
compute_type: float16
language: zh
batch_size: 16
tts:
engine: xtts # Coqui TTS XTTS v2
device: cuda
speed: 1.0
estimated_memory: 4.0 # GB
xtts:
model_name: tts_models/multilingual/multi-dataset/xtts_v2
estimated_memory: 4.0 # GB
audio:
sample_rate: 16000
channels: 1
format: wav
# -------------------- 任务配置 --------------------
task:
max_concurrent_tasks: 3
cleanup_temp_files: true
save_intermediate_files: false
# -------------------- 音频混合配置 --------------------
audio_mix:
bgm_volume: 0.3
dub_volume: 1.0
enable_ducking: true
# -------------------- 字幕配置 --------------------
subtitle:
max_chars_per_line: 40
max_duration_per_segment: 7.0
format: srt
# -------------------- 日志配置 --------------------
log:
level: INFO
format: '<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file.path}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>'
rotation: 500 MB
retention: 7 days
colorize: true
# -------------------- FFmpeg配置 --------------------
ffmpeg:
# 基础路径配置
ffmpeg_path: tools/ffmpeg/ffmpeg-8.0.1-essentials_build/bin/ffmpeg.exe
ffprobe_path: tools/ffmpeg/ffmpeg-8.0.1-essentials_build/bin/ffprobe.exe
# GPU加速配置 (NVIDIA NVENC/NVDEC)
enable_gpu: true # 启用GPU加速 (自动检测支持)
gpu_device: 0 # 使用的GPU设备ID
# 视频编码器配置
video_encoder: h264_nvenc # GPU编码器: h264_nvenc, hevc_nvenc, av1_nvenc
video_decoder: h264_cuvid # GPU解码器: h264_cuvid, hevc_cuvid, vp9_cuvid
encoder_preset: p4 # 编码预设: p1(最快)-p7(最慢/最佳质量), 推荐p4
encoder_cq: 18 # 恒定质量参数: 0-51, 越小质量越高, 推荐18-23
# 音频配置
audio_encoder: aac # 音频编码器
audio_bitrate: 192k # 音频比特率
# -------------------- API密钥 (在 .env 中配置) --------------------
#
# 以下配置项从 .env 文件读取,不要在此文件中设置敏感信息:
#
# 【必需】Dify API 密钥:
# DIFY__BASE_URL=https://api.dify.ai/v1
# DIFY__SUBTITLE_CLEAN_API_KEY=app-xxx
# DIFY__TRANSLATION_API_KEY=app-xxx
#
# 【必需】HuggingFace 令牌 (用于 pyannote 说话人分离):
# HUGGINGFACE_TOKEN=hf_xxx
#
# 【可选】GPU 配置 (调度器会自动检测,通常无需设置):
# CUDA_VISIBLE_DEVICES=0,1 # 仅当需要限制使用特定GPU时设置
# -------------------- GPU调度器配置 --------------------
gpu_scheduler:
max_workers: 0 # 最大并发线程数(0=由显存自动决定)
model_cache_timeout: 5 # 模型缓存超时(秒)
model_cache_cleanup_interval: 10 # 缓存清理间隔(秒)
preload_enabled: true # 启用模型预加载
preload_window: 3 # 预加载窗口大小
preload_safety_margin: 0.15 # 预加载安全边界(15%显存)