Spaces:

Entz
/

streamlit_ollama_test

Sleeping

App Files Files Community

Entz commited on 25 days ago

Commit

04d2598

•

1 Parent(s): f69fed3

Update myollama.py

Browse files

Files changed (1) hide show

myollama.py +30 -150

myollama.py CHANGED Viewed

@@ -1,25 +1,33 @@
-class Ollama(CustomLLM):
-    """Ollama LLM.
-    Visit https://ollama.com/ to download and install Ollama.
-    Run `ollama serve` to start a server.
-    Run `ollama pull <name>` to download a model to run.
-    Examples:
-        `pip install llama-index-llms-ollama`
-        ```python
-        from llama_index.llms.ollama import Ollama
-        llm = Ollama(model="llama2", request_timeout=60.0)
-        response = llm.complete("What is the capital of France?")
-        print(response)
-        ```
-    """
     base_url: str = Field(
         default="http://localhost:11434",
         description="Base url the model is hosted under.",
@@ -43,10 +51,6 @@ class Ollama(CustomLLM):
     prompt_key: str = Field(
         default="prompt", description="The key to use for the prompt in API calls."
     )
-    json_mode: bool = Field(
-        default=False,
-        description="Whether to use JSON mode for the Ollama API.",
-    )
     additional_kwargs: Dict[str, Any] = Field(
         default_factory=dict,
         description="Additional model parameters for the Ollama API.",
@@ -94,9 +98,6 @@ class Ollama(CustomLLM):
             **kwargs,
         }
-        if self.json_mode:
-            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/chat",
@@ -109,12 +110,12 @@ class Ollama(CustomLLM):
                 message=ChatMessage(
                     content=message.get("content"),
                     role=MessageRole(message.get("role")),
-                    additional_kwargs=get_additional_kwargs(
                         message, ("content", "role")
                     ),
                 ),
                 raw=raw,
-                additional_kwargs=get_additional_kwargs(raw, ("message",)),
             )
     @llm_chat_callback()
@@ -136,9 +137,6 @@ class Ollama(CustomLLM):
             **kwargs,
         }
-        if self.json_mode:
-            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
@@ -159,59 +157,15 @@ class Ollama(CustomLLM):
                             message=ChatMessage(
                                 content=text,
                                 role=MessageRole(message.get("role")),
-                                additional_kwargs=get_additional_kwargs(
                                     message, ("content", "role")
                                 ),
                             ),
                             delta=delta,
                             raw=chunk,
-                            additional_kwargs=get_additional_kwargs(
-                                chunk, ("message",)
-                            ),
                         )
-    @llm_chat_callback()
-    async def achat(
-        self, messages: Sequence[ChatMessage], **kwargs: Any
-    ) -> ChatResponseAsyncGen:
-        payload = {
-            "model": self.model,
-            "messages": [
-                {
-                    "role": message.role.value,
-                    "content": message.content,
-                    **message.additional_kwargs,
-                }
-                for message in messages
-            ],
-            "options": self._model_kwargs,
-            "stream": False,
-            **kwargs,
-        }
-        if self.json_mode:
-            payload["format"] = "json"
-        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
-            response = await client.post(
-                url=f"{self.base_url}/api/chat",
-                json=payload,
-            )
-            response.raise_for_status()
-            raw = response.json()
-            message = raw["message"]
-            return ChatResponse(
-                message=ChatMessage(
-                    content=message.get("content"),
-                    role=MessageRole(message.get("role")),
-                    additional_kwargs=get_additional_kwargs(
-                        message, ("content", "role")
-                    ),
-                ),
-                raw=raw,
-                additional_kwargs=get_additional_kwargs(raw, ("message",)),
-            )
     @llm_completion_callback()
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
@@ -224,9 +178,6 @@ class Ollama(CustomLLM):
             **kwargs,
         }
-        if self.json_mode:
-            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/generate",
@@ -238,36 +189,7 @@ class Ollama(CustomLLM):
             return CompletionResponse(
                 text=text,
                 raw=raw,
-                additional_kwargs=get_additional_kwargs(raw, ("response",)),
-            )
-    @llm_completion_callback()
-    async def acomplete(
-        self, prompt: str, formatted: bool = False, **kwargs: Any
-    ) -> CompletionResponse:
-        payload = {
-            self.prompt_key: prompt,
-            "model": self.model,
-            "options": self._model_kwargs,
-            "stream": False,
-            **kwargs,
-        }
-        if self.json_mode:
-            payload["format"] = "json"
-        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
-            response = await client.post(
-                url=f"{self.base_url}/api/generate",
-                json=payload,
-            )
-            response.raise_for_status()
-            raw = response.json()
-            text = raw.get("response")
-            return CompletionResponse(
-                text=text,
-                raw=raw,
-                additional_kwargs=get_additional_kwargs(raw, ("response",)),
             )
     @llm_completion_callback()
@@ -282,9 +204,6 @@ class Ollama(CustomLLM):
             **kwargs,
         }
-        if self.json_mode:
-            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
@@ -302,46 +221,7 @@ class Ollama(CustomLLM):
                             delta=delta,
                             text=text,
                             raw=chunk,
-                            additional_kwargs=get_additional_kwargs(
                                 chunk, ("response",)
                             ),
-                        )
-    @llm_completion_callback()
-    async def astream_complete(
-        self, prompt: str, formatted: bool = False, **kwargs: Any
-    ) -> CompletionResponseAsyncGen:
-        payload = {
-            self.prompt_key: prompt,
-            "model": self.model,
-            "options": self._model_kwargs,
-            "stream": True,
-            **kwargs,
-        }
-        if self.json_mode:
-            payload["format"] = "json"
-        async def gen() -> CompletionResponseAsyncGen:
-            async with httpx.AsyncClient(
-                timeout=Timeout(self.request_timeout)
-            ) as client:
-                async with client.stream(
-                    method="POST",
-                    url=f"{self.base_url}/api/generate",
-                    json=payload,
-                ) as response:
-                    async for line in response.aiter_lines():
-                        if line:
-                            chunk = json.loads(line)
-                            delta = chunk.get("response")
-                            yield CompletionResponse(
-                                delta=delta,
-                                text=delta,
-                                raw=chunk,
-                                additional_kwargs=get_additional_kwargs(
-                                    chunk, ("response",)
-                                ),
-                            )
-        return gen()

+import json
+from typing import Any, Dict, Sequence, Tuple
+import httpx
+from httpx import Timeout
+from llama_index.legacy.bridge.pydantic import Field
+from llama_index.legacy.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
+from llama_index.legacy.core.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+)
+from llama_index.legacy.llms.base import llm_chat_callback, llm_completion_callback
+from llama_index.legacy.llms.custom import CustomLLM
+DEFAULT_REQUEST_TIMEOUT = 30.0
+def get_addtional_kwargs(
+    response: Dict[str, Any], exclude: Tuple[str, ...]
+) -> Dict[str, Any]:
+    return {k: v for k, v in response.items() if k not in exclude}
+class Ollama(CustomLLM):
     base_url: str = Field(
         default="http://localhost:11434",
         description="Base url the model is hosted under.",
     prompt_key: str = Field(
         default="prompt", description="The key to use for the prompt in API calls."
     )
     additional_kwargs: Dict[str, Any] = Field(
         default_factory=dict,
         description="Additional model parameters for the Ollama API.",
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/chat",
                 message=ChatMessage(
                     content=message.get("content"),
                     role=MessageRole(message.get("role")),
+                    additional_kwargs=get_addtional_kwargs(
                         message, ("content", "role")
                     ),
                 ),
                 raw=raw,
+                additional_kwargs=get_addtional_kwargs(raw, ("message",)),
             )
     @llm_chat_callback()
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
                             message=ChatMessage(
                                 content=text,
                                 role=MessageRole(message.get("role")),
+                                additional_kwargs=get_addtional_kwargs(
                                     message, ("content", "role")
                                 ),
                             ),
                             delta=delta,
                             raw=chunk,
+                            additional_kwargs=get_addtional_kwargs(chunk, ("message",)),
                         )
     @llm_completion_callback()
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/generate",
             return CompletionResponse(
                 text=text,
                 raw=raw,
+                additional_kwargs=get_addtional_kwargs(raw, ("response",)),
             )
     @llm_completion_callback()
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
                             delta=delta,
                             text=text,
                             raw=chunk,
+                            additional_kwargs=get_addtional_kwargs(
                                 chunk, ("response",)
                             ),
+                        )