Skip to content

Commit 348391e

Browse files
committed
feat: introduce image generation configuration, FileSearchCallContent, and support for including input in interaction retrieval.
1 parent 08c36ad commit 348391e

8 files changed

Lines changed: 159 additions & 10 deletions

File tree

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
<groupId>io.github.glaforge</groupId>
2323
<artifactId>gemini-interactions-api-sdk</artifactId>
24-
<version>0.6.1-SNAPSHOT</version>
24+
<version>0.7.0-SNAPSHOT</version>
2525
<name>Gemini Interactions API SDK</name>
2626

2727
<properties>

src/main/java/io/github/glaforge/gemini/interactions/GeminiInteractionsClient.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,24 @@ public Stream<Events> stream(InteractionParams.Request request) {
164164
* @see <a href="https://ai.google.dev/api/interactions-api#getInteractionById">Get Interaction API Reference</a>
165165
*/
166166
public Interaction get(String id) {
167+
return get(id, false);
168+
}
169+
170+
/**
171+
* Retrieves an interaction by ID, optionally including the original input.
172+
*
173+
* @param id The interaction ID.
174+
* @param includeInput Whether to include the input in the response.
175+
* @return The Interaction.
176+
* @throws GeminiInteractionsException If the API request fails or an error occurs.
177+
* @see <a href="https://ai.google.dev/api/interactions-api#getInteractionById">Get Interaction API Reference</a>
178+
*/
179+
public Interaction get(String id, boolean includeInput) {
167180
try {
168181
String url = String.format("%s/%s/interactions/%s", baseUrl, version, id);
182+
if (includeInput) {
183+
url += "?include_input=true";
184+
}
169185

170186
HttpRequest httpRequest = HttpRequest.newBuilder()
171187
.uri(URI.create(url))

src/main/java/io/github/glaforge/gemini/interactions/model/Config.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ private Config() {}
4242
* @param thinkingSummaries Configuration for thinking summaries.
4343
* @param maxOutputTokens The maximum number of tokens to include in a candidate.
4444
* @param speechConfig Configuration for speech generation.
45+
* @param imageConfig Configuration for image generation.
4546
*/
4647
@JsonIgnoreProperties(ignoreUnknown = true)
4748
public record GenerationConfig(
@@ -53,7 +54,8 @@ public record GenerationConfig(
5354
@JsonProperty("thinking_level") ThinkingLevel thinkingLevel,
5455
@JsonProperty("thinking_summaries") ThinkingSummaries thinkingSummaries,
5556
@JsonProperty("max_output_tokens") Integer maxOutputTokens,
56-
@JsonProperty("speech_config") List<SpeechConfig> speechConfig
57+
@JsonProperty("speech_config") List<SpeechConfig> speechConfig,
58+
@JsonProperty("image_config") ImageConfig imageConfig
5759
) {}
5860

5961
/**
@@ -90,6 +92,43 @@ public record SpeechConfig(
9092
String speaker
9193
) {}
9294

95+
/**
96+
* Configuration for image generation.
97+
*
98+
* @param aspectRatio The aspect ratio of the generated image.
99+
* @param imageSize The size of the generated image.
100+
*/
101+
@JsonIgnoreProperties(ignoreUnknown = true)
102+
public record ImageConfig(
103+
@JsonProperty("aspect_ratio") AspectRatio aspectRatio,
104+
@JsonProperty("image_size") ImageSize imageSize
105+
) {}
106+
107+
/**
108+
* Aspect ratio for generated images.
109+
*/
110+
public enum AspectRatio {
111+
@JsonProperty("1:1") RATIO_1_1,
112+
@JsonProperty("2:3") RATIO_2_3,
113+
@JsonProperty("3:2") RATIO_3_2,
114+
@JsonProperty("3:4") RATIO_3_4,
115+
@JsonProperty("4:3") RATIO_4_3,
116+
@JsonProperty("4:5") RATIO_4_5,
117+
@JsonProperty("5:4") RATIO_5_4,
118+
@JsonProperty("9:16") RATIO_9_16,
119+
@JsonProperty("16:9") RATIO_16_9,
120+
@JsonProperty("21:9") RATIO_21_9
121+
}
122+
123+
/**
124+
* Size for generated images.
125+
*/
126+
public enum ImageSize {
127+
@JsonProperty("1K") SIZE_1K,
128+
@JsonProperty("2K") SIZE_2K,
129+
@JsonProperty("4K") SIZE_4K
130+
}
131+
93132
/**
94133
* Sealed interface for agent configurations.
95134
*/

src/main/java/io/github/glaforge/gemini/interactions/model/Content.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
@JsonSubTypes.Type(value = Content.GoogleSearchResultContent.class, name = "google_search_result"),
5151
@JsonSubTypes.Type(value = Content.McpServerToolCallContent.class, name = "mcp_server_tool_call"),
5252
@JsonSubTypes.Type(value = Content.McpServerToolResultContent.class, name = "mcp_server_tool_result"),
53+
@JsonSubTypes.Type(value = Content.FileSearchCallContent.class, name = "file_search_call"),
5354
@JsonSubTypes.Type(value = Content.FileSearchResultContent.class, name = "file_search_result")
5455
})
5556
public sealed interface Content permits
@@ -69,6 +70,7 @@ public sealed interface Content permits
6970
Content.GoogleSearchResultContent,
7071
Content.McpServerToolCallContent,
7172
Content.McpServerToolResultContent,
73+
Content.FileSearchCallContent,
7274
Content.FileSearchResultContent {
7375

7476
/**
@@ -274,9 +276,19 @@ record FunctionResultContent(
274276
@JsonProperty("call_id") String callId,
275277
String name,
276278
@JsonProperty("is_error") Boolean isError,
277-
Object result // oneOf object, object(items), string
279+
Object result // string, object, or ToolResult with List<Content> items
278280
) implements Content {}
279281

282+
/**
283+
* Structure for multimodal tool results.
284+
*
285+
* @param items List of content items (TextContent, ImageContent, etc.)
286+
*/
287+
@JsonIgnoreProperties(ignoreUnknown = true)
288+
record ToolResult(
289+
List<Content> items
290+
) {}
291+
280292
// --- Code Execution ---
281293

282294
/**
@@ -472,11 +484,23 @@ record McpServerToolResultContent(
472484
@JsonProperty("call_id") String callId,
473485
String name,
474486
@JsonProperty("server_name") String serverName,
475-
Object result // oneOf object, object(items), string
487+
Object result // string, object, or ToolResult with List<Content> items
476488
) implements Content {}
477489

478490
// --- File Search ---
479491

492+
/**
493+
* Content representing a file search call.
494+
*
495+
* @param type The type of content (must be "file_search_call").
496+
* @param id The unique identifier for the file search call.
497+
*/
498+
@JsonIgnoreProperties(ignoreUnknown = true)
499+
record FileSearchCallContent(
500+
String type,
501+
String id
502+
) implements Content {}
503+
480504
/**
481505
* Content representing the result of a file search.
482506
*

src/main/java/io/github/glaforge/gemini/interactions/model/Interaction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public record Turn(
7474
* @param outputTokensByModality Output tokens broken down by modality.
7575
* @param totalToolUseTokens Total tool use tokens.
7676
* @param toolUseTokensByModality Tool use tokens broken down by modality.
77-
* @param totalReasoningTokens Total reasoning tokens.
77+
* @param totalThoughtTokens Total thought (reasoning) tokens.
7878
* @param totalTokens Total tokens.
7979
*/
8080
@JsonIgnoreProperties(ignoreUnknown = true)
@@ -87,7 +87,7 @@ public record Usage(
8787
@JsonProperty("output_tokens_by_modality") List<ModalityTokens> outputTokensByModality,
8888
@JsonProperty("total_tool_use_tokens") Integer totalToolUseTokens,
8989
@JsonProperty("tool_use_tokens_by_modality") List<ModalityTokens> toolUseTokensByModality,
90-
@JsonProperty("total_reasoning_tokens") Integer totalReasoningTokens,
90+
@JsonProperty("total_thought_tokens") Integer totalThoughtTokens,
9191
@JsonProperty("total_tokens") Integer totalTokens
9292
) {}
9393

src/main/java/io/github/glaforge/gemini/interactions/model/InteractionParams.java

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ public Builder() {}
9696
private Object responseFormat;
9797
private String responseMimeType;
9898
private String previousInteractionId;
99+
private Config.ImageConfig imageConfig;
99100

100101
/**
101102
* Sets the model.
@@ -259,13 +260,40 @@ public Builder responseFormat(Schema schema) {
259260
*/
260261
public Builder previousInteractionId(String previousInteractionId) { this.previousInteractionId = previousInteractionId; return this; }
261262

263+
/**
264+
* Sets the image config.
265+
*
266+
* @param imageConfig The image configuration.
267+
* @return This builder.
268+
*/
269+
public Builder imageConfig(Config.ImageConfig imageConfig) { this.imageConfig = imageConfig; return this; }
270+
262271
/**
263272
* Builds the CreateModelInteractionParams.
264273
*
265274
* @return The CreateModelInteractionParams parameters.
266275
*/
267276
public ModelInteractionParams build() {
268-
return new ModelInteractionParams(model, input, generationConfig, tools, stream, store, background, systemInstruction, responseModalities, responseFormat, responseMimeType, previousInteractionId);
277+
Config.GenerationConfig finalConfig = generationConfig;
278+
if (imageConfig != null) {
279+
if (finalConfig == null) {
280+
finalConfig = new Config.GenerationConfig(null, null, null, null, null, null, null, null, null, imageConfig);
281+
} else {
282+
finalConfig = new Config.GenerationConfig(
283+
finalConfig.temperature(),
284+
finalConfig.topP(),
285+
finalConfig.seed(),
286+
finalConfig.stopSequences(),
287+
finalConfig.toolChoice(),
288+
finalConfig.thinkingLevel(),
289+
finalConfig.thinkingSummaries(),
290+
finalConfig.maxOutputTokens(),
291+
finalConfig.speechConfig(),
292+
imageConfig
293+
);
294+
}
295+
}
296+
return new ModelInteractionParams(model, input, finalConfig, tools, stream, store, background, systemInstruction, responseModalities, responseFormat, responseMimeType, previousInteractionId);
269297
}
270298
}
271299
}
@@ -316,6 +344,7 @@ public Builder() {}
316344
private String agent;
317345
private Object input;
318346
private Config.AgentConfig agentConfig;
347+
private Config.GenerationConfig generationConfig;
319348
private List<Tool> tools;
320349
private Boolean stream;
321350
private Boolean store;
@@ -325,6 +354,7 @@ public Builder() {}
325354
private Object responseFormat;
326355
private String responseMimeType;
327356
private String previousInteractionId;
357+
private Config.ImageConfig imageConfig;
328358

329359
/**
330360
* Sets the agent.
@@ -381,6 +411,14 @@ public Builder() {}
381411
* @return This builder.
382412
*/
383413
public Builder agentConfig(Config.AgentConfig agentConfig) { this.agentConfig = agentConfig; return this; }
414+
415+
/**
416+
* Sets the generation config.
417+
*
418+
* @param generationConfig The generation configuration.
419+
* @return This builder.
420+
*/
421+
public Builder generationConfig(Config.GenerationConfig generationConfig) { this.generationConfig = generationConfig; return this; }
384422
/**
385423
* Sets the tools.
386424
*
@@ -494,13 +532,40 @@ public Builder responseFormat(String responseFormat) {
494532
*/
495533
public Builder previousInteractionId(String previousInteractionId) { this.previousInteractionId = previousInteractionId; return this; }
496534

535+
/**
536+
* Sets the image config.
537+
*
538+
* @param imageConfig The image configuration.
539+
* @return This builder.
540+
*/
541+
public Builder imageConfig(Config.ImageConfig imageConfig) { this.imageConfig = imageConfig; return this; }
542+
497543
/**
498544
* Builds the CreateAgentInteractionParams.
499545
*
500546
* @return The CreateAgentInteractionParams parameters.
501547
*/
502548
public AgentInteractionParams build() {
503-
return new AgentInteractionParams(agent, input, agentConfig, null, tools, stream, store, background, systemInstruction, responseModalities, responseFormat, responseMimeType, previousInteractionId);
549+
Config.GenerationConfig finalConfig = generationConfig;
550+
if (imageConfig != null) {
551+
if (finalConfig == null) {
552+
finalConfig = new Config.GenerationConfig(null, null, null, null, null, null, null, null, null, imageConfig);
553+
} else {
554+
finalConfig = new Config.GenerationConfig(
555+
finalConfig.temperature(),
556+
finalConfig.topP(),
557+
finalConfig.seed(),
558+
finalConfig.stopSequences(),
559+
finalConfig.toolChoice(),
560+
finalConfig.thinkingLevel(),
561+
finalConfig.thinkingSummaries(),
562+
finalConfig.maxOutputTokens(),
563+
finalConfig.speechConfig(),
564+
imageConfig
565+
);
566+
}
567+
}
568+
return new AgentInteractionParams(agent, input, agentConfig, finalConfig, tools, stream, store, background, systemInstruction, responseModalities, responseFormat, responseMimeType, previousInteractionId);
504569
}
505570
}
506571
}

src/test/java/io/github/glaforge/gemini/interactions/ResearchFrontend.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ public static void main(String[] args) {
113113

114114
var tabs = Jt.tabs(List.of("Full Report", "Summary", "Infographic")).use();
115115

116-
var reportPlaceholder = Jt.empty().key("fullReport").use(tabs.tab(0));
116+
var reportContainer = Jt.container().key("reportContainer").use(tabs.tab(0));
117+
var reportPlaceholder = Jt.empty().key("fullReport").use(reportContainer);
117118
var summaryPlaceholder = Jt.empty().key("summary").use(tabs.tab(1));
118119
var infographicPlaceholder = Jt.empty().key("infographic").use(tabs.tab(2));
119120

@@ -164,6 +165,9 @@ public static void main(String[] args) {
164165
}
165166
});
166167

168+
var rawReportExpander = Jt.expander("Raw Markdown Report").use(reportContainer);
169+
Jt.text(transformCitations(reportBuilder.toString())).use(rawReportExpander);
170+
167171
// compute/fetch summary
168172
ModelInteractionParams summaryParams = ModelInteractionParams.builder()
169173
.model("gemini-3-pro-preview")

src/test/java/io/github/glaforge/gemini/interactions/UsageDemoTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ public void testSdkCompilationAndUsage() {
102102
ThinkingLevel.LOW, // thinking_level
103103
ThinkingSummaries.AUTO, // thinking_summaries
104104
1000,
105-
null // speech
105+
null, // speech
106+
null // image
106107
);
107108
assertNotNull(config);
108109

0 commit comments

Comments
 (0)