codebot/ollama_llm.py at master · msftphil/codebot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# ollama_llm.py

from typing import Optional, List, Mapping, Any
from langchain.llms.base import LLM
import requests
import json

class Ollama(LLM):
    model_name: str = "llama2"
    base_url: str = "http://localhost:11434"
    max_tokens: int = 512
    temperature: float = 0.7
    top_p: float = 1.0

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        url = f"{self.base_url}/api/generate"
        payload = {
            "model": self.model_name,
            "prompt": prompt,
            "options": {
                "max_tokens": self.max_tokens,
                "temperature": self.temperature,
                "top_p": self.top_p,
            }
        }

        # Send the request with streaming enabled
        response = requests.post(url, json=payload, stream=True)
        response.raise_for_status()

        generated_text = ''
        for line in response.iter_lines():
            if line:
                line = line.decode('utf-8')
                try:
                    data = json.loads(line)
                    chunk = data.get('response', '')
                    generated_text += chunk

                    # Handle stop tokens
                    if stop and any(token in chunk for token in stop):
                        # Truncate at the first occurrence of any stop token
                        for token in stop:
                            if token in generated_text:
                                generated_text = generated_text.split(token)[0]
                                break
                        break
                except json.JSONDecodeError as e:
                    # If a line isn't valid JSON, we can skip it or handle it as needed
                    continue

        return generated_text.strip()

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            "model_name": self.model_name,
            "max_tokens": self.max_tokens,
            "temperature": self.temperature,
            "top_p": self.top_p,
        }

    @property
    def _llm_type(self) -> str:
        return "ollama"