Merge pull request #147 from NianJiuZst/feat/sdk-audio-save

RyanLee-Dev · web-flow · commit 93e57061cf27 · 2026-05-15T09:45:08.000+08:00
feat: add audio file save support to SpeechSDK and MusicSDK
diff --git a/src/sdk/music/index.ts b/src/sdk/music/index.ts
@@ -1,3 +1,5 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import { resolve, dirname } from 'node:path';
 import { Client } from "../client";
 import { musicEndpoint } from "../../client/endpoints";
 import { MusicRequest, MusicResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
 import { toMerged } from "es-toolkit/object";
 import { musicGenerateModel } from "../../commands/music/models";
 
+function hexToBuffer(hex: string): Buffer {
+  if (!/^[0-9a-fA-F]*$/.test(hex)) {
+    throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
+  }
+  if (hex.length % 2 !== 0) {
+    throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
+  }
+  return Buffer.from(hex, 'hex');
+}
+
+function defaultFilename(prefix: string, ext: string): string {
+  const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+  return `${prefix}_${ts}.${ext}`;
+}
+
 export interface MusicGenerateRequest extends MusicRequest {
   /** Vocal style, e.g. "warm male baritone", "bright female soprano", "duet with harmonies" */
   vocals?: string;
@@ -81,6 +98,38 @@ export class MusicSDK extends Client {
     });
   }
 
+  /**
+   * Save generated music audio to a file. Decodes the hex-encoded audio
+   * from the API response and writes it to disk. Creates intermediate
+   * directories as needed.
+   *
+   * @param response — The response from `generate()`.
+   * @param outPath  — Target file path. Defaults to `music_<timestamp>.mp3`.
+   * @param ext      — File extension (default: `"mp3"`).
+   * @returns The absolute path of the saved file.
+   */
+  save(response: MusicResponse, outPath?: string, ext = 'mp3'): string {
+    const dest = resolve(outPath || defaultFilename('music', ext));
+    const audioHex = response.data.audio;
+    if (!audioHex) {
+      throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
+    }
+
+    const dir = dirname(dest);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+
+    try {
+      writeFileSync(dest, hexToBuffer(audioHex));
+    } catch (err) {
+      if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
+        throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
+      }
+      throw err;
+    }
+
+    return dest;
+  }
+
   private buildPrompt(request: ModelPartial<MusicGenerateRequest>) {
     const structuredParts: string[] = [];
     if (request.vocals)      structuredParts.push(`Vocals: ${request.vocals as string}`);
diff --git a/src/sdk/speech/index.ts b/src/sdk/speech/index.ts
@@ -1,3 +1,5 @@
+import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
+import { resolve, dirname } from 'node:path';
 import { Client } from "../client";
 import { speechEndpoint, voicesEndpoint } from "../../client/endpoints";
 import { SpeechRequest, SpeechResponse, VoiceListResponse } from "../../types/api";
@@ -7,6 +9,21 @@ import { ExitCode } from "../../errors/codes";
 import { toMerged } from "es-toolkit/object";
 import { ModelPartial } from "../types";
 
+function hexToBuffer(hex: string): Buffer {
+  if (!/^[0-9a-fA-F]*$/.test(hex)) {
+    throw new SDKError('API returned invalid audio data (not valid hex).', ExitCode.GENERAL);
+  }
+  if (hex.length % 2 !== 0) {
+    throw new SDKError('API returned truncated audio data (odd-length hex string).', ExitCode.GENERAL);
+  }
+  return Buffer.from(hex, 'hex');
+}
+
+function defaultFilename(prefix: string, ext: string): string {
+  const ts = new Date().toISOString().slice(0, 19).replace(/[T:]/g, '-');
+  return `${prefix}_${ts}.${ext}`;
+}
+
 export class SpeechSDK extends Client {
   async synthesize(request: ModelPartial<SpeechRequest> & { stream: true }): Promise<AsyncGenerator<SpeechResponse>>;
   async synthesize(request: ModelPartial<SpeechRequest>): Promise<SpeechResponse>;
@@ -56,6 +73,38 @@ export class SpeechSDK extends Client {
     return voices;
   }
 
+  /**
+   * Save synthesized speech audio to a file. Decodes the hex-encoded audio
+   * from the API response and writes it to disk. Creates intermediate
+   * directories as needed.
+   *
+   * @param response — The response from `synthesize()`.
+   * @param outPath  — Target file path. Defaults to `speech_<timestamp>.mp3`.
+   * @param ext      — File extension (default: `"mp3"`).
+   * @returns The absolute path of the saved file.
+   */
+  save(response: SpeechResponse, outPath?: string, ext = 'mp3'): string {
+    const dest = resolve(outPath || defaultFilename('speech', ext));
+    const audioHex = response.data.audio;
+    if (!audioHex) {
+      throw new SDKError('API response missing audio data.', ExitCode.GENERAL);
+    }
+
+    const dir = dirname(dest);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+
+    try {
+      writeFileSync(dest, hexToBuffer(audioHex));
+    } catch (err) {
+      if ((err as NodeJS.ErrnoException).code === 'ENOSPC') {
+        throw new SDKError('Disk full — cannot write audio file.', ExitCode.GENERAL);
+      }
+      throw err;
+    }
+
+    return dest;
+  }
+
   private validateParams(params: Partial<SpeechRequest>): SpeechRequest {
     if (!params.text) {
       throw new SDKError('text is required', ExitCode.USAGE);
diff --git a/test/sdk/music.test.ts b/test/sdk/music.test.ts
@@ -1,6 +1,21 @@
 import { describe, it, expect, afterEach } from 'bun:test';
 import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
 import { MiniMaxSDK } from '../../src/sdk';
+import { MusicSDK } from '../../src/sdk/music';
+import { existsSync, unlinkSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import type { MusicResponse } from '../../src/types/api';
+
+function makeMusicResponse(hexAudio?: string): MusicResponse {
+  return {
+    base_resp: { status_code: 0, status_msg: 'ok' },
+    data: {
+      audio: hexAudio || Buffer.from('hello music audio').toString('hex'),
+      status: 0,
+    },
+  };
+}
 
 describe('MiniMaxSDK.music', () => {
   let server: MockServer;
@@ -32,3 +47,40 @@ describe('MiniMaxSDK.music', () => {
     expect(result.data.audio_url).toBe('https://example.com/music.mp3');
   });
 });
+
+describe('MusicSDK.save', () => {
+  const sdk = new MusicSDK({ apiKey: 'sk-test', region: 'global' });
+
+  it('decodes hex audio and saves to disk', () => {
+    const out = join(tmpdir(), `music-sdk-save-${Date.now()}.mp3`);
+    const response = makeMusicResponse();
+
+    const saved = sdk.save(response, out);
+    expect(saved).toBe(out);
+    expect(existsSync(out)).toBe(true);
+    expect(readFileSync(out).toString()).toBe('hello music audio');
+    unlinkSync(out);
+  });
+
+  it('generates default filename with timestamp', () => {
+    const response = makeMusicResponse();
+    const saved = sdk.save(response);
+    expect(saved).toMatch(/music_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('creates intermediate directories', () => {
+    const out = join(tmpdir(), `music-sdk-deep-${Date.now()}`, 'x', 'y', 'song.wav');
+    const response = makeMusicResponse();
+    const saved = sdk.save(response, out, 'wav');
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('throws when audio data is missing', () => {
+    const response = makeMusicResponse('');
+    response.data.audio = undefined;
+    expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
+  });
+});
diff --git a/test/sdk/speech.test.ts b/test/sdk/speech.test.ts
@@ -1,6 +1,21 @@
 import { describe, it, expect, afterEach } from 'bun:test';
 import { createMockServer, jsonResponse, type MockServer } from '../helpers/mock-server';
 import { MiniMaxSDK } from '../../src/sdk';
+import { SpeechSDK } from '../../src/sdk/speech';
+import { existsSync, unlinkSync, readFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import type { SpeechResponse } from '../../src/types/api';
+
+function makeSpeechResponse(hexAudio?: string): SpeechResponse {
+  return {
+    base_resp: { status_code: 0, status_msg: 'ok' },
+    data: {
+      audio: hexAudio || Buffer.from('hello speech audio').toString('hex'),
+      status: 0,
+    },
+  };
+}
 
 describe('MiniMaxSDK.speech', () => {
   let server: MockServer;
@@ -54,3 +69,40 @@ describe('MiniMaxSDK.speech', () => {
     expect(voices[0].voice_id).toBe('voice-1');
   });
 });
+
+describe('SpeechSDK.save', () => {
+  const sdk = new SpeechSDK({ apiKey: 'sk-test', region: 'global' });
+
+  it('decodes hex audio and saves to disk', () => {
+    const out = join(tmpdir(), `speech-sdk-save-${Date.now()}.mp3`);
+    const response = makeSpeechResponse();
+
+    const saved = sdk.save(response, out);
+    expect(saved).toBe(out);
+    expect(existsSync(out)).toBe(true);
+    expect(readFileSync(out).toString()).toBe('hello speech audio');
+    unlinkSync(out);
+  });
+
+  it('generates default filename with timestamp', () => {
+    const response = makeSpeechResponse();
+    const saved = sdk.save(response);
+    expect(saved).toMatch(/speech_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.mp3/);
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('creates intermediate directories', () => {
+    const out = join(tmpdir(), `speech-sdk-deep-${Date.now()}`, 'a', 'b', 'out.wav');
+    const response = makeSpeechResponse();
+    const saved = sdk.save(response, out, 'wav');
+    expect(existsSync(saved)).toBe(true);
+    unlinkSync(saved);
+  });
+
+  it('throws when audio data is missing', () => {
+    const response = makeSpeechResponse('');
+    response.data.audio = undefined;
+    expect(() => sdk.save(response, '/tmp/test.mp3')).toThrow('missing audio data');
+  });
+});