clis-wrapper/message_adapter.py at main · Labiri/clis-wrapper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
from typing import List, Optional, Dict, Any, Union
from models import Message
import re


class MessageAdapter:
    """Converts between OpenAI message format and Claude Code prompts."""

    @staticmethod
    def has_structured_format(content: str) -> bool:
        """
        Detect if content has structured format (XML, JSON, etc).
        Used to determine if content should be preserved as-is.
        """
        if not content or len(content) < 10:
            return False

        # Check for XML-like patterns (opening and closing tags)
        import re
        xml_pattern = r'<([a-zA-Z_][\w\-\.]*)(\s[^>]*)?>.*?</\1>'
        if re.search(xml_pattern, content, re.DOTALL):
            return True

        # Check for JSON-like patterns
        content_stripped = content.strip()
        if (content_stripped.startswith('{') and content_stripped.endswith('}')) or \
           (content_stripped.startswith('[') and content_stripped.endswith(']')):
            try:
                import json
                json.loads(content_stripped)
                return True
            except:
                pass

        # Check for structured format indicators
        structured_indicators = [
            '```',  # Code blocks
            '<?xml',  # XML declaration
            '<!DOCTYPE',  # HTML/XML doctype
        ]

        for indicator in structured_indicators:
            if indicator in content:
                return True

        return False

    @staticmethod
    def messages_to_prompt(messages: List[Message]) -> tuple[str, Optional[str]]:
        """
        Convert OpenAI messages to Claude Code prompt format.
        Returns (prompt, system_prompt)
        """
        system_prompt = None
        conversation_parts = []

        for message in messages:
            if message.role == "system":
                # Use the last system message as the system prompt
                system_prompt = message.content
            elif message.role == "user":
                conversation_parts.append(f"Human: {message.content}")
            elif message.role == "assistant":
                conversation_parts.append(f"Assistant: {message.content}")

        # Join conversation parts
        prompt = "\n\n".join(conversation_parts)

        # If the last message wasn't from the user, add a prompt for assistant
        if messages and messages[-1].role != "user":
            prompt += "\n\nHuman: Please continue."

        return prompt, system_prompt

    @staticmethod
    def messages_to_prompt_with_images(
        messages: List[Message],
        image_mappings: Dict[str, str]
    ) -> tuple[str, Optional[str], List[str]]:
        """
        Convert OpenAI messages to Claude Code prompt format with image references.

        Args:
            messages: List of Message objects
            image_mappings: Dictionary mapping image URLs to local file paths

        Returns:
            Tuple of (prompt, system_prompt, list_of_image_paths)
        """
        system_prompt = None
        conversation_parts = []
        referenced_images = []

        for message in messages:
            if message.role == "system":
                # Use the last system message as the system prompt
                system_prompt = message.content
            else:
                # Process the content to include image references
                content = MessageAdapter._process_content_with_images(
                    message.content,
                    image_mappings,
                    referenced_images
                )

                if message.role == "user":
                    conversation_parts.append(f"Human: {content}")
                elif message.role == "assistant":
                    conversation_parts.append(f"Assistant: {content}")

        # Join conversation parts
        prompt = "\n\n".join(conversation_parts)

        # If the last message wasn't from the user, add a prompt for assistant
        if messages and messages[-1].role != "user":
            prompt += "\n\nHuman: Please continue."

        # Get unique image paths
        unique_image_paths = list(set(referenced_images))

        return prompt, system_prompt, unique_image_paths

    @staticmethod
    def _process_content_with_images(
        content: Union[str, List[Any]],
        image_mappings: Dict[str, str],
        referenced_images: List[str]
    ) -> str:
        """
        Process message content to include image file references.

        Args:
            content: Message content (string or list of content parts)
            image_mappings: URL to file path mappings
            referenced_images: List to append referenced image paths to

        Returns:
            Processed content string with image references
        """
        if isinstance(content, str):
            return content

        # Process array content
        text_parts = []

        for part in content:
            if isinstance(part, dict):
                if part.get('type') == 'text':
                    text_parts.append(part.get('text', ''))
                elif part.get('type') == 'image_url':
                    # Get the image URL
                    image_url = part.get('image_url', {}).get('url', '')

                    # Look up the local file path
                    if image_url in image_mappings:
                        file_path = image_mappings[image_url]
                        # Add reference to the image in the text
                        text_parts.append(f"[Image: {file_path}]")
                        referenced_images.append(file_path)
                    else:
                        # Image wasn't processed (maybe failed to download)
                        text_parts.append("[Image: Failed to process]")

        # Join text parts with spaces
        return " ".join(text_parts) if text_parts else ""

    @staticmethod
    def filter_content(content: str) -> str:
        """
        Filter content for unsupported features and tool usage.
        Remove thinking blocks, tool calls, and image references.
        """
        if not content:
            return content

        # Remove thinking blocks (common when tools are disabled but Claude tries to think)
        thinking_pattern = r'<thinking>.*?</thinking>'
        content = re.sub(thinking_pattern, '', content, flags=re.DOTALL)

        # Extract content from attempt_completion blocks (these contain the actual user response)
        attempt_completion_pattern = r'<attempt_completion>(.*?)</attempt_completion>'
        attempt_matches = re.findall(attempt_completion_pattern, content, flags=re.DOTALL)
        if attempt_matches:
            # Use the content from the attempt_completion block
            extracted_content = attempt_matches[0].strip()

            # If there's a <result> tag inside, extract from that
            result_pattern = r'<result>(.*?)</result>'
            result_matches = re.findall(result_pattern, extracted_content, flags=re.DOTALL)
            if result_matches:
                extracted_content = result_matches[0].strip()

            if extracted_content:
                content = extracted_content
        else:
            # Remove other tool usage blocks (when tools are disabled but Claude tries to use them)
            tool_patterns = [
                r'<read_file>.*?</read_file>',
                r'<write_file>.*?</write_file>',
                r'<bash>.*?</bash>',
                r'<search_files>.*?</search_files>',
                r'<str_replace_editor>.*?</str_replace_editor>',
                r'<args>.*?</args>',
                r'<ask_followup_question>.*?</ask_followup_question>',
                r'<attempt_completion>.*?</attempt_completion>',
                r'<question>.*?</question>',
                r'<follow_up>.*?</follow_up>',
                r'<suggest>.*?</suggest>',
            ]

            for pattern in tool_patterns:
                content = re.sub(pattern, '', content, flags=re.DOTALL)

        # No longer filter out images - they are now supported!
        # Images are processed separately by the ImageHandler

        # Clean up extra whitespace and newlines
        content = re.sub(r'\n\s*\n\s*\n', '\n\n', content)  # Multiple newlines to double
        content = content.strip()

        # If content is now empty or only whitespace, provide a fallback
        if not content or content.isspace():
            return "I understand you're testing the system. How can I help you today?"

        return content

    @staticmethod
    def format_claude_response(content: str, model: str, finish_reason: str = "stop") -> Dict[str, Any]:
        """Format Claude response for OpenAI compatibility."""
        return {
            "role": "assistant",
            "content": content,
            "finish_reason": finish_reason,
            "model": model
        }

    @staticmethod
    def estimate_tokens(text: str) -> int:
        """
        Rough estimation of token count.
        OpenAI's rule of thumb: ~4 characters per token for English text.
        """
        return len(text) // 4

    @staticmethod
    def validate_xml_tool_response(content: str) -> bool:
        """Check if content contains valid XML tool tags."""
        import re

        # Check for ANY XML tool-like tags (not just specific ones)
        # This regex looks for opening XML tags that look like tools
        # (lowercase words, possibly with underscores)
        tool_pattern = r'<([a-z][a-z_]*[a-z]|[a-z])>'

        # Also check for common tool tags from configuration
        from xml_tools_config import get_known_xml_tools

        content_lower = content.lower()
        known_tools = get_known_xml_tools()

        # Check if any known tools are present (wrap tool names in angle brackets)
        has_known_tool = any(f"<{tool}>" in content_lower for tool in known_tools)

        # Or check if there's any XML-like tool structure
        has_xml_structure = bool(re.search(tool_pattern, content))

        return has_known_tool or has_xml_structure