Skip to content

Commit 93e5706

Browse files
authored
Merge pull request #147 from NianJiuZst/feat/sdk-audio-save
feat: add audio file save support to SpeechSDK and MusicSDK
2 parents 75cb6c4 + ac72641 commit 93e5706

4 files changed

Lines changed: 202 additions & 0 deletions

File tree

src/sdk/music/index.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
2+
import { resolve, dirname } from 'node:path';
13
import { Client } from "../client";
24
import { musicEndpoint } from "../../client/endpoints";
35
import { MusicRequest, MusicResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
79
import { toMerged } from "es-toolkit/object";
810
import { musicGenerateModel } from "../../commands/music/models";
911

12+
function hexToBuffer(hex: string): Buffer {
13+
if (!/^[0-9a-fA-F]*$/.test(hex)) {
14+
throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
15+
}
16+
if (hex.length % 2 !== 0) {
17+
throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
18+
}
19+
return Buffer.from(hex, 'hex');
20+
}
21+
22+
function defaultFilename(prefix: string, ext: string): string {
23+
const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
24+
return `${prefix}_${ts}.${ext}`;
25+
}
26+
1027
export interface MusicGenerateRequest extends MusicRequest {
1128
/** Vocal style, e.g. "warm male baritone", "bright female soprano", "duet with harmonies" */
1229
vocals?: string;
@@ -81,6 +98,38 @@ export class MusicSDK extends Client {
8198
});
8299
}
83100

101+
/**
102+
* Save generated music audio to a file. Decodes the hex-encoded audio
103+
* from the API response and writes it to disk. Creates intermediate
104+
* directories as needed.
105+
*
106+
* @param response — The response from `generate()`.
107+
* @param outPath — Target file path. Defaults to `music_<timestamp>.mp3`.
108+
* @param ext — File extension (default: `"mp3"`).
109+
* @returns The absolute path of the saved file.
110+
*/
111+
save(response: MusicResponse, outPath?: string, ext = 'mp3'): string {
112+
const dest = resolve(outPath || defaultFilename('music', ext));
113+
const audioHex = response.data.audio;
114+
if (!audioHex) {
115+
throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
116+
}
117+
118+
const dir = dirname(dest);
119+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
120+
121+
try {
122+
writeFileSync(dest, hexToBuffer(audioHex));
123+
} catch (err) {
124+
if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
125+
throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
126+
}
127+
throw err;
128+
}
129+
130+
return dest;
131+
}
132+
84133
private buildPrompt(request: ModelPartial<MusicGenerateRequest>) {
85134
const structuredParts: string[] = [];
86135
if (request.vocals) structuredParts.push(`Vocals: ${request.vocals as string}`);

src/sdk/speech/index.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
2+
import { resolve, dirname } from 'node:path';
13
import { Client } from "../client";
24
import { speechEndpoint, voicesEndpoint } from "../../client/endpoints";
35
import { SpeechRequest, SpeechResponse, VoiceListResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
79
import { toMerged } from "es-toolkit/object";
810
import { ModelPartial } from "../types";
911

12+
function hexToBuffer(hex: string): Buffer {
13+
if (!/^[0-9a-fA-F]*$/.test(hex)) {
14+
throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
15+
}
16+
if (hex.length % 2 !== 0) {
17+
throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
18+
}
19+
return Buffer.from(hex, 'hex');
20+
}
21+
22+
function defaultFilename(prefix: string, ext: string): string {
23+
const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
24+
return `${prefix}_${ts}.${ext}`;
25+
}
26+
1027
export class SpeechSDK extends Client {
1128
async synthesize(request: ModelPartial<SpeechRequest> & { stream: true }): Promise<AsyncGenerator<SpeechResponse>>;
1229
async synthesize(request: ModelPartial<SpeechRequest>): Promise<SpeechResponse>;
@@ -56,6 +73,38 @@ export class SpeechSDK extends Client {
5673
return voices;
5774
}
5875

76+
/**
77+
* Save synthesized speech audio to a file. Decodes the hex-encoded audio
78+
* from the API response and writes it to disk. Creates intermediate
79+
* directories as needed.
80+
*
81+
* @param response — The response from `synthesize()`.
82+
* @param outPath — Target file path. Defaults to `speech_<timestamp>.mp3`.
83+
* @param ext — File extension (default: `"mp3"`).
84+
* @returns The absolute path of the saved file.
85+
*/
86+
save(response: SpeechResponse, outPath?: string, ext = 'mp3'): string {
87+
const dest = resolve(outPath || defaultFilename('speech', ext));
88+
const audioHex = response.data.audio;
89+
if (!audioHex) {
90+
throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
91+
}
92+
93+
const dir = dirname(dest);
94+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
95+
96+
try {
97+
writeFileSync(dest, hexToBuffer(audioHex));
98+
} catch (err) {
99+
if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
100+
throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
101+
}
102+
throw err;
103+
}
104+
105+
return dest;
106+
}
107+
59108
private validateParams(params: Partial<SpeechRequest>): SpeechRequest {
60109
if (!params.text) {
61110
throw new SDKError('text is required', ExitCode.USAGE);

test/sdk/music.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
import { describe, it, expect, afterEach } from 'bun:test';
22
import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
33
import { MiniMaxSDK } from '../../src/sdk';
4+
import { MusicSDK } from '../../src/sdk/music';
5+
import { existsSync, unlinkSync, readFileSync } from 'node:fs';
6+
import { join } from 'node:path';
7+
import { tmpdir } from 'node:os';
8+
import type { MusicResponse } from '../../src/types/api';
9+
10+
function makeMusicResponse(hexAudio?: string): MusicResponse {
11+
return {
12+
base_resp: { status_code: 0, status_msg: 'ok' },
13+
data: {
14+
audio: hexAudio || Buffer.from('hello music audio').toString('hex'),
15+
status: 0,
16+
},
17+
};
18+
}
419

520
describe('MiniMaxSDK.music', () => {
621
let server: MockServer;
@@ -32,3 +47,40 @@ describe('MiniMaxSDK.music', () => {
3247
expect(result.data.audio_url).toBe('https://example.com/music.mp3');
3348
});
3449
});
50+
51+
describe('MusicSDK.save', () => {
52+
const sdk = new MusicSDK({ apiKey: 'sk-test', region: 'global' });
53+
54+
it('decodes hex audio and saves to disk', () => {
55+
const out = join(tmpdir(), `music-sdk-save-${Date.now()}.mp3`);
56+
const response = makeMusicResponse();
57+
58+
const saved = sdk.save(response, out);
59+
expect(saved).toBe(out);
60+
expect(existsSync(out)).toBe(true);
61+
expect(readFileSync(out).toString()).toBe('hello music audio');
62+
unlinkSync(out);
63+
});
64+
65+
it('generates default filename with timestamp', () => {
66+
const response = makeMusicResponse();
67+
const saved = sdk.save(response);
68+
expect(saved).toMatch(/music_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
69+
expect(existsSync(saved)).toBe(true);
70+
unlinkSync(saved);
71+
});
72+
73+
it('creates intermediate directories', () => {
74+
const out = join(tmpdir(), `music-sdk-deep-${Date.now()}`, 'x', 'y', 'song.wav');
75+
const response = makeMusicResponse();
76+
const saved = sdk.save(response, out, 'wav');
77+
expect(existsSync(saved)).toBe(true);
78+
unlinkSync(saved);
79+
});
80+
81+
it('throws when audio data is missing', () => {
82+
const response = makeMusicResponse('');
83+
response.data.audio = undefined;
84+
expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
85+
});
86+
});

test/sdk/speech.test.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
import { describe, it, expect, afterEach } from 'bun:test';
22
import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
33
import { MiniMaxSDK } from '../../src/sdk';
4+
import { SpeechSDK } from '../../src/sdk/speech';
5+
import { existsSync, unlinkSync, readFileSync } from 'node:fs';
6+
import { join } from 'node:path';
7+
import { tmpdir } from 'node:os';
8+
import type { SpeechResponse } from '../../src/types/api';
9+
10+
function makeSpeechResponse(hexAudio?: string): SpeechResponse {
11+
return {
12+
base_resp: { status_code: 0, status_msg: 'ok' },
13+
data: {
14+
audio: hexAudio || Buffer.from('hello speech audio').toString('hex'),
15+
status: 0,
16+
},
17+
};
18+
}
419

520
describe('MiniMaxSDK.speech', () => {
621
let server: MockServer;
@@ -54,3 +69,40 @@ describe('MiniMaxSDK.speech', () => {
5469
expect(voices[0].voice_id).toBe('voice-1');
5570
});
5671
});
72+
73+
describe('SpeechSDK.save', () => {
74+
const sdk = new SpeechSDK({ apiKey: 'sk-test', region: 'global' });
75+
76+
it('decodes hex audio and saves to disk', () => {
77+
const out = join(tmpdir(), `speech-sdk-save-${Date.now()}.mp3`);
78+
const response = makeSpeechResponse();
79+
80+
const saved = sdk.save(response, out);
81+
expect(saved).toBe(out);
82+
expect(existsSync(out)).toBe(true);
83+
expect(readFileSync(out).toString()).toBe('hello speech audio');
84+
unlinkSync(out);
85+
});
86+
87+
it('generates default filename with timestamp', () => {
88+
const response = makeSpeechResponse();
89+
const saved = sdk.save(response);
90+
expect(saved).toMatch(/speech_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
91+
expect(existsSync(saved)).toBe(true);
92+
unlinkSync(saved);
93+
});
94+
95+
it('creates intermediate directories', () => {
96+
const out = join(tmpdir(), `speech-sdk-deep-${Date.now()}`, 'a', 'b', 'out.wav');
97+
const response = makeSpeechResponse();
98+
const saved = sdk.save(response, out, 'wav');
99+
expect(existsSync(saved)).toBe(true);
100+
unlinkSync(saved);
101+
});
102+
103+
it('throws when audio data is missing', () => {
104+
const response = makeSpeechResponse('');
105+
response.data.audio = undefined;
106+
expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
107+
});
108+
});

0 commit comments

Comments
 (0)