Skip to content
33 changes: 31 additions & 2 deletions pkg/llm/provider/ollama/ollama.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"

"github.com/papercomputeco/tapes/pkg/llm"
"github.com/papercomputeco/tapes/pkg/llm/provider/openai"
)

// Provider implements the Provider interface for Ollama's API.
Expand All @@ -23,7 +24,15 @@ func (o *Provider) DefaultStreaming() bool {
func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) {
var req ollamaRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
return openai.ParseRequestPayload(payload)
}

// Detect OpenAI-format payloads that unmarshal successfully but lose content.
// When content is a JSON array (e.g. OpenCode sending OpenAI-format requests
// to Ollama), Go's decoder silently zero-values the string field, producing
// messages with a role but no content, images, or tool calls.
if hasLostContent(req.Messages) {
return openai.ParseRequestPayload(payload)
}

messages := make([]llm.Message, 0, len(req.Messages))
Expand Down Expand Up @@ -108,7 +117,14 @@ func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) {
func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) {
var resp ollamaResponse
if err := json.Unmarshal(payload, &resp); err != nil {
return nil, err
return openai.ParseResponsePayload(payload)
}

// Detect OpenAI-format responses: they use a "choices" array instead of a
// top-level "message" field, so resp.Message will be zero-valued while the
// model field is still populated from the JSON.
if resp.Model != "" && resp.Message.Role == "" && !resp.Done {
return openai.ParseResponsePayload(payload)
}

// Convert message content
Expand Down Expand Up @@ -183,3 +199,16 @@ func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) {
func (o *Provider) ParseStreamChunk(_ []byte) (*llm.StreamChunk, error) {
panic("Not yet implemented")
}

// hasLostContent detects when an OpenAI-format payload was unmarshaled into
// Ollama types. Because ollamaMessage.Content is a string, array-valued content
// (e.g. [{type: "text", text: "..."}]) gets silently zero-valued by Go's JSON
// decoder, producing messages with a role but no content, images, or tool calls.
func hasLostContent(msgs []ollamaMessage) bool {
for _, m := range msgs {
if m.Role != "" && m.Content == "" && len(m.Images) == 0 && len(m.ToolCalls) == 0 {
return true
}
}
return false
}
89 changes: 89 additions & 0 deletions pkg/llm/provider/ollama/ollama_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,95 @@ var _ = Describe("Ollama Provider", func() {
})
})

Describe("ParseRequest with OpenAI-format content (OpenCode compatibility)", func() {
It("parses array content from OpenCode/Ollama requests", func() {
// This is the exact format OpenCode sends when using Ollama,
// where content is an array of objects instead of a plain string.
// See: https://github.com/papercomputeco/tapes/issues/137
payload := []byte(`{
"model": "qwen3-coder:30b",
"max_tokens": 32000,
"top_p": 1,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": [
{"type": "text", "text": "I want to plan a unit test"},
{"type": "text", "text": "Additional context here"}
]
}
]
}`)

req, err := p.ParseRequest(payload)
Expect(err).NotTo(HaveOccurred())
Expect(req.Model).To(Equal("qwen3-coder:30b"))
Expect(req.Messages).To(HaveLen(2))
Expect(req.Messages[0].Role).To(Equal("system"))
Expect(req.Messages[0].GetText()).To(Equal("You are a helpful assistant."))
Expect(req.Messages[1].Role).To(Equal("user"))
Expect(req.Messages[1].Content).To(HaveLen(2))
Expect(req.Messages[1].Content[0].Text).To(Equal("I want to plan a unit test"))
Expect(req.Messages[1].Content[1].Text).To(Equal("Additional context here"))
})

It("handles all-string OpenAI-format messages without false positive", func() {
// When all messages have string content, native Ollama parsing should
// be used (no fallback needed).
payload := []byte(`{
"model": "llama2",
"messages": [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"}
]
}`)

req, err := p.ParseRequest(payload)
Expect(err).NotTo(HaveOccurred())
Expect(req.Messages).To(HaveLen(2))
Expect(req.Messages[1].GetText()).To(Equal("Hello!"))
})
})

Describe("ParseResponse with OpenAI-format (OpenCode compatibility)", func() {
It("parses OpenAI-format response with choices array", func() {
payload := []byte(`{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677858242,
"model": "qwen3-coder:30b",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Here is the test plan."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
}
}`)

resp, err := p.ParseResponse(payload)
Expect(err).NotTo(HaveOccurred())
Expect(resp.Model).To(Equal("qwen3-coder:30b"))
Expect(resp.Message.Role).To(Equal("assistant"))
Expect(resp.Message.GetText()).To(Equal("Here is the test plan."))
Expect(resp.Usage).NotTo(BeNil())
Expect(resp.Usage.PromptTokens).To(Equal(100))
Expect(resp.Usage.CompletionTokens).To(Equal(50))
})
})

Describe("ParseRequest with tool calls", func() {
It("parses tool calls in assistant messages", func() {
payload := []byte(`{
Expand Down
193 changes: 2 additions & 191 deletions pkg/llm/provider/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
package openai

import (
"encoding/json"
"time"

"github.com/papercomputeco/tapes/pkg/llm"
)

Expand All @@ -23,197 +20,11 @@ func (o *Provider) DefaultStreaming() bool {
}

func (o *Provider) ParseRequest(payload []byte) (*llm.ChatRequest, error) {
var req openaiRequest
if err := json.Unmarshal(payload, &req); err != nil {
return nil, err
}

messages := make([]llm.Message, 0, len(req.Messages))
for _, msg := range req.Messages {
converted := llm.Message{Role: msg.Role}

switch content := msg.Content.(type) {
case string:
converted.Content = []llm.ContentBlock{{Type: "text", Text: content}}
case []any:
// Multimodal content (e.g., vision)
for _, item := range content {
if part, ok := item.(map[string]any); ok {
cb := llm.ContentBlock{}
if t, ok := part["type"].(string); ok {
cb.Type = t
}
if text, ok := part["text"].(string); ok {
cb.Text = text
}
if imageURL, ok := part["image_url"].(map[string]any); ok {
cb.Type = "image"
if url, ok := imageURL["url"].(string); ok {
cb.ImageURL = url
}
}
converted.Content = append(converted.Content, cb)
}
}
case nil:
// Empty content (can happen with tool calls)
converted.Content = []llm.ContentBlock{}
}

// Handle tool calls in assistant messages
for _, tc := range msg.ToolCalls {
var input map[string]any
if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil {
converted.Content = append(converted.Content, llm.ContentBlock{
Type: "tool_use",
ToolUseID: tc.ID,
ToolName: tc.Function.Name,
ToolInput: input,
})
}
}

// Handle tool results
if msg.Role == "tool" && msg.ToolCallID != "" {
text := ""
if s, ok := msg.Content.(string); ok {
text = s
}
converted.Content = []llm.ContentBlock{{
Type: "tool_result",
ToolResultID: msg.ToolCallID,
ToolOutput: text,
}}
}

messages = append(messages, converted)
}

// Parse stop sequences
var stop []string
switch s := req.Stop.(type) {
case string:
stop = []string{s}
case []any:
for _, item := range s {
if str, ok := item.(string); ok {
stop = append(stop, str)
}
}
}

result := &llm.ChatRequest{
Model: req.Model,
Messages: messages,
MaxTokens: req.MaxTokens,
Temperature: req.Temperature,
TopP: req.TopP,
Stop: stop,
Seed: req.Seed,
Stream: req.Stream,
RawRequest: payload,
}

// Preserve OpenAI-specific fields
if req.FrequencyPenalty != nil || req.PresencePenalty != nil || req.ResponseFormat != nil {
result.Extra = make(map[string]any)
if req.FrequencyPenalty != nil {
result.Extra["frequency_penalty"] = *req.FrequencyPenalty
}
if req.PresencePenalty != nil {
result.Extra["presence_penalty"] = *req.PresencePenalty
}
if req.ResponseFormat != nil {
result.Extra["response_format"] = req.ResponseFormat
}
}

return result, nil
return ParseRequestPayload(payload)
}

func (o *Provider) ParseResponse(payload []byte) (*llm.ChatResponse, error) {
var resp openaiResponse
if err := json.Unmarshal(payload, &resp); err != nil {
return nil, err
}

if len(resp.Choices) == 0 {
// Return empty response if no choices
return &llm.ChatResponse{
Model: resp.Model,
Done: true,
RawResponse: payload,
}, nil
}

choice := resp.Choices[0]
msg := choice.Message

// Convert message content
var content []llm.ContentBlock
switch c := msg.Content.(type) {
case string:
content = []llm.ContentBlock{{Type: "text", Text: c}}
case []any:
for _, item := range c {
if part, ok := item.(map[string]any); ok {
cb := llm.ContentBlock{}
if t, ok := part["type"].(string); ok {
cb.Type = t
}
if text, ok := part["text"].(string); ok {
cb.Text = text
}
content = append(content, cb)
}
}
case nil:
content = []llm.ContentBlock{}
}

// Handle tool calls
for _, tc := range msg.ToolCalls {
var input map[string]any
if err := json.Unmarshal([]byte(tc.Function.Arguments), &input); err == nil {
content = append(content, llm.ContentBlock{
Type: "tool_use",
ToolUseID: tc.ID,
ToolName: tc.Function.Name,
ToolInput: input,
})
}
}

var usage *llm.Usage
if resp.Usage != nil {
usage = &llm.Usage{
PromptTokens: resp.Usage.PromptTokens,
CompletionTokens: resp.Usage.CompletionTokens,
TotalTokens: resp.Usage.TotalTokens,
}
if resp.Usage.PromptTokensDetails != nil {
usage.CacheReadInputTokens = resp.Usage.PromptTokensDetails.CachedTokens
}
}

result := &llm.ChatResponse{
Model: resp.Model,
Message: llm.Message{
Role: msg.Role,
Content: content,
},
Done: true,
StopReason: choice.FinishReason,
Usage: usage,
CreatedAt: time.Unix(resp.Created, 0),
RawResponse: payload,
Extra: map[string]any{
"id": resp.ID,
"object": resp.Object,
},
}

return result, nil
return ParseResponsePayload(payload)
}

func (o *Provider) ParseStreamChunk(_ []byte) (*llm.StreamChunk, error) {
Expand Down
Loading
Loading