Veo-4-API/veo4_api.py at main · Anil-matcha/Veo-4-API · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import os
import requests
import time
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

class Veo4API:
    def __init__(self, api_key=None):
        """
        Initialize the Veo 4 API client.
        :param api_key: Your MuAPI.ai API key. Defaults to MUAPI_API_KEY environment variable.
        """
        self.api_key = api_key or os.getenv("MUAPI_API_KEY")
        if not self.api_key:
            raise ValueError("API Key is required. Set MUAPI_API_KEY in .env or pass it to the constructor.")

        self.base_url = "https://api.muapi.ai/api/v1"
        self.headers = {
            "x-api-key": self.api_key,
            "Content-Type": "application/json"
        }

    def text_to_video(self, prompt, aspect_ratio="16:9", duration=8, quality="4k",
                      with_audio=False, camera_control=None):
        """
        Submits a Veo 4 Text-to-Video (T2V) generation task.

        Veo 4 by Google DeepMind generates native 4K video using an upgraded
        Transformer architecture (3x parameters vs Veo 3). Supports integrated
        audio generation and advanced camera controls.

        :param prompt: The text prompt describing the video.
        :param aspect_ratio: Video aspect ratio (e.g., '16:9', '9:16', '1:1').
        :param duration: Video duration in seconds (8–30).
        :param quality: Output quality ('1080p' or '4k').
        :param with_audio: Whether to jointly generate audio alongside the video.
        :param camera_control: Optional camera movement hint (e.g., 'pan left',
                               'zoom in', 'orbit', 'tracking shot').
        :return: JSON response with request_id.
        """
        endpoint = f"{self.base_url}/veo-4-t2v"
        if with_audio:
            endpoint = f"{self.base_url}/veo-4-t2v-audio"
        payload = {
            "prompt": prompt,
            "aspect_ratio": aspect_ratio,
            "duration": duration,
            "quality": quality,
        }
        if camera_control:
            payload["camera_control"] = camera_control
        return self._post_request(endpoint, payload)

    def image_to_video(self, prompt, images_list, aspect_ratio="16:9", duration=8,
                       quality="4k", with_audio=False, camera_control=None):
        """
        Submits a Veo 4 Image-to-Video (I2V) generation task.

        Animate one or more static images into a native 4K video. Reference images
        in the prompt using @image1, @image2, etc.

        :param prompt: Text prompt to guide the animation. Use @image1, @image2, etc.
        :param images_list: A list of image URLs to animate.
        :param aspect_ratio: Video aspect ratio.
        :param duration: Video duration in seconds (8–30).
        :param quality: Output quality ('1080p' or '4k').
        :param with_audio: Whether to jointly generate audio alongside the video.
        :param camera_control: Optional camera movement hint.
        :return: JSON response with request_id.
        """
        endpoint = f"{self.base_url}/veo-4-i2v"
        if with_audio:
            endpoint = f"{self.base_url}/veo-4-i2v-audio"
        payload = {
            "prompt": prompt,
            "images_list": images_list,
            "aspect_ratio": aspect_ratio,
            "duration": duration,
            "quality": quality,
        }
        if camera_control:
            payload["camera_control"] = camera_control
        return self._post_request(endpoint, payload)

    def text_to_video_with_audio(self, prompt, aspect_ratio="16:9", duration=8,
                                 quality="4k", camera_control=None):
        """
        Generate a Veo 4 video with integrated audio from a text prompt.

        Veo 4 generates synchronized dialogue, ambient sound, and music jointly
        in one pass. Include audio cues in the prompt for best results
        (e.g. 'waves crashing', 'crowd cheering', 'piano melody').

        :param prompt: Text prompt. Include audio cues for richer sound.
        :param aspect_ratio: Video aspect ratio.
        :param duration: Video duration in seconds (8–30).
        :param quality: Output quality ('1080p' or '4k').
        :param camera_control: Optional camera movement hint.
        :return: JSON response with request_id.
        """
        return self.text_to_video(
            prompt, aspect_ratio, duration, quality,
            with_audio=True, camera_control=camera_control
        )

    def image_to_video_with_audio(self, prompt, images_list, aspect_ratio="16:9",
                                  duration=8, quality="4k", camera_control=None):
        """
        Animate images into a Veo 4 video with integrated audio.

        :param prompt: Text prompt. Reference images with @image1, @image2, etc.
                       Include audio cues for richer output.
        :param images_list: List of image URLs to animate.
        :param aspect_ratio: Video aspect ratio.
        :param duration: Video duration in seconds (8–30).
        :param quality: Output quality ('1080p' or '4k').
        :param camera_control: Optional camera movement hint.
        :return: JSON response with request_id.
        """
        return self.image_to_video(
            prompt, images_list, aspect_ratio, duration, quality,
            with_audio=True, camera_control=camera_control
        )

    def character_video(self, prompt, character_images, aspect_ratio="16:9",
                        duration=8, quality="4k", with_audio=False):
        """
        Generate a video with consistent character identity using Veo 4's
        character anchoring technology.

        Veo 4 keeps faces, clothing, and distinguishing features consistent
        across all frames, even through complex movements and camera changes.

        :param prompt: Scene description. Reference the character with @image1.
        :param character_images: List of 1–3 reference image URLs of the character.
        :param aspect_ratio: Video aspect ratio.
        :param duration: Video duration in seconds.
        :param quality: Output quality ('1080p' or '4k').
        :param with_audio: Whether to jointly generate audio.
        :return: JSON response with request_id.

        Example::

            result = api.character_video(
                prompt="@image1 walks through a futuristic city, confident stride",
                character_images=["https://example.com/person.jpg"],
                aspect_ratio="16:9",
                duration=8,
                quality="4k",
            )
            video = api.wait_for_completion(result["request_id"])
            print(video["outputs"][0])
        """
        endpoint = f"{self.base_url}/veo-4-character"
        payload = {
            "prompt": prompt if "@image1" in prompt else f"@image1 {prompt.strip()}",
            "images_list": character_images,
            "aspect_ratio": aspect_ratio,
            "duration": duration,
            "quality": quality,
            "with_audio": with_audio,
        }
        return self._post_request(endpoint, payload)

    def extend_video(self, request_id, prompt="", duration=8, quality="4k"):
        """
        Extends a previously generated Veo 4 video.

        :param request_id: The request_id of the video segment to extend.
        :param prompt: Optional text prompt to guide the continuation.
        :param duration: Seconds to extend by (8–30).
        :param quality: Output quality ('1080p' or '4k').
        :return: JSON response with request_id.
        """
        endpoint = f"{self.base_url}/veo-4-extend"
        payload = {
            "request_id": request_id,
            "prompt": prompt,
            "duration": duration,
            "quality": quality,
        }
        return self._post_request(endpoint, payload)

    def video_edit(self, prompt, video_urls, images_list=None, aspect_ratio="16:9",
                   quality="4k"):
        """
        Edit an existing video using natural language with Veo 4.

        :param prompt: Describe the desired edits.
        :param video_urls: List of video URLs to edit.
        :param images_list: Optional list of reference image URLs.
        :param aspect_ratio: Output video aspect ratio.
        :param quality: Output quality ('1080p' or '4k').
        :return: JSON response with request_id.
        """
        endpoint = f"{self.base_url}/veo-4-video-edit"
        payload = {
            "prompt": prompt,
            "video_urls": video_urls,
            "images_list": images_list or [],
            "aspect_ratio": aspect_ratio,
            "quality": quality,
        }
        return self._post_request(endpoint, payload)

    def _post_request(self, endpoint, payload):
        response = requests.post(endpoint, json=payload, headers=self.headers)
        response.raise_for_status()
        return response.json()

    def upload_file(self, file_path):
        """
        Uploads a local file (image or video) to MuAPI for use in generation tasks.

        :param file_path: Path to the local file to upload.
        :return: JSON response containing the URL of the uploaded file.
        """
        endpoint = f"{self.base_url}/upload_file"
        headers = {"x-api-key": self.api_key}
        with open(file_path, "rb") as file_data:
            files = {"file": file_data}
            response = requests.post(endpoint, headers=headers, files=files)
        response.raise_for_status()
        return response.json()

    def get_result(self, request_id):
        """
        Polls for the result of a Veo 4 generation task.

        :param request_id: The request_id returned from a generation call.
        :return: JSON response with status and outputs.
        """
        endpoint = f"{self.base_url}/predictions/{request_id}/result"
        response = requests.get(endpoint, headers=self.headers)
        response.raise_for_status()
        return response.json()

    def wait_for_completion(self, request_id, poll_interval=5, timeout=600):
        """
        Blocks until a Veo 4 generation task completes and returns the result.

        :param request_id: The request_id returned from a generation call.
        :param poll_interval: Seconds between status polls (default 5).
        :param timeout: Maximum seconds to wait before raising TimeoutError (default 600).
        :return: Completed result JSON with 'outputs' list.
        """
        start_time = time.time()
        while time.time() - start_time < timeout:
            result = self.get_result(request_id)
            status = result.get("status")

            if status == "completed":
                return result
            elif status == "failed":
                raise Exception(f"Video generation failed: {result.get('error')}")

            print(f"Status: {status}. Waiting {poll_interval} seconds...")
            time.sleep(poll_interval)

        raise TimeoutError("Timed out waiting for Veo 4 video generation to complete.")


if __name__ == "__main__":
    try:
        api = Veo4API()
        prompt = "A cinematic tracking shot through a lush rainforest, sunlight filtering through the canopy, birds calling"

        print(f"Submitting T2V task with prompt: {prompt}")
        submission = api.text_to_video(prompt=prompt, duration=8, quality="4k")
        request_id = submission.get("request_id")
        print(f"Task submitted. Request ID: {request_id}")

        print("Waiting for completion...")
        result = api.wait_for_completion(request_id)
        print(f"Generation completed! Video URL: {result.get('outputs', [None])[0]}")

    except Exception as e:
        print(f"Error: {e}")