-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrvc_server.py
More file actions
143 lines (113 loc) · 4.45 KB
/
rvc_server.py
File metadata and controls
143 lines (113 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""
RVC Voice Conversion HTTP Server
Fixed to miku_default_rvc model. Accepts audio input, returns converted WAV.
Usage:
.\venv\Scripts\python.exe rvc_server.py [--port 6242]
API:
POST /convert
- multipart/form-data with field "audio" (wav/mp3/flac file)
- optional query params: pitch (int, default 0), f0method (str, default rmvpe)
- returns: WAV audio (audio/wav)
GET /health
- returns: {"status": "ok", "model": "miku_default_rvc.pth"}
"""
import os
import sys
import io
import tempfile
import logging
import argparse
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
from dotenv import load_dotenv
load_dotenv()
import soundfile as sf
import numpy as np
from fastapi import FastAPI, UploadFile, File, Query
from fastapi.responses import StreamingResponse
import uvicorn
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
MODEL_NAME = "miku_default_rvc.pth"
INDEX_PATH = os.path.join("logs", "miku_default_rvc",
"added_IVF4457_Flat_nprobe_1_miku_default_rvc_v2.index")
app = FastAPI(title="RVC Voice Conversion Server")
vc_instance = None
config_instance = None
def init_model():
global vc_instance, config_instance
from configs.config import Config
from infer.modules.vc.modules import VC
config_instance = Config()
vc_instance = VC(config_instance)
logger.info(f"Loading model: {MODEL_NAME}")
vc_instance.get_vc(MODEL_NAME)
logger.info(f"Model loaded. tgt_sr={vc_instance.tgt_sr}, version={vc_instance.version}")
@app.on_event("startup")
async def startup():
init_model()
@app.get("/health")
def health():
return {
"status": "ok",
"model": MODEL_NAME,
"tgt_sr": vc_instance.tgt_sr if vc_instance else None,
"device": str(config_instance.device) if config_instance else None,
}
@app.post("/convert")
async def convert(
audio: UploadFile = File(...),
pitch: int = Query(0, description="Pitch shift in semitones (12 = octave up, -12 = octave down)"),
f0method: str = Query("rmvpe", description="F0 extraction method: rmvpe, pm, harvest, crepe"),
index_rate: float = Query(0.75, description="Index feature ratio (0-1)"),
filter_radius: int = Query(3, description="Median filter radius for harvest"),
resample_sr: int = Query(0, description="Resample to this rate (0 = no resample)"),
rms_mix_rate: float = Query(0.25, description="Volume envelope mix ratio"),
protect: float = Query(0.33, description="Protect voiceless consonants (0-0.5)"),
):
if vc_instance is None:
return {"error": "Model not loaded"}
suffix = os.path.splitext(audio.filename or "input.wav")[1] or ".wav"
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
content = await audio.read()
tmp.write(content)
tmp_path = tmp.name
try:
info, result = vc_instance.vc_single(
sid=0,
input_audio_path=tmp_path,
f0_up_key=pitch,
f0_file=None,
f0_method=f0method,
file_index=INDEX_PATH if os.path.exists(INDEX_PATH) else "",
file_index2="",
index_rate=index_rate,
filter_radius=filter_radius,
resample_sr=resample_sr,
rms_mix_rate=rms_mix_rate,
protect=protect,
)
tgt_sr, audio_opt = result
if audio_opt is None:
return {"error": info}
buf = io.BytesIO()
sf.write(buf, audio_opt, tgt_sr, format="WAV", subtype="PCM_16")
buf.seek(0)
logger.info(f"Converted: {audio.filename} -> {tgt_sr}Hz, {len(audio_opt)} samples. {info}")
return StreamingResponse(buf, media_type="audio/wav",
headers={"Content-Disposition": "attachment; filename=converted.wav",
"X-RVC-Info": info.replace("\n", " | ")})
finally:
try:
os.unlink(tmp_path)
except OSError:
pass
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RVC Voice Conversion Server")
parser.add_argument("--port", type=int, default=6242, help="Server port (default: 6242)")
parser.add_argument("--host", type=str, default="0.0.0.0", help="Bind host")
args, _ = parser.parse_known_args()
# Clear sys.argv so RVC's internal Config argparse doesn't choke
sys.argv = [sys.argv[0]]
logger.info(f"Starting RVC server on {args.host}:{args.port}")
uvicorn.run(app, host=args.host, port=args.port)