Spaces:

Entz
/

streamlit_ollama_test

Sleeping

App Files Files Community

Entz commited on 25 days ago

Commit

fd806b4

•

1 Parent(s): 04d2598

Update myollama.py

Browse files

Files changed (1) hide show

myollama.py +167 -15

myollama.py CHANGED Viewed

@@ -1,35 +1,59 @@
 import json
 from typing import Any, Dict, Sequence, Tuple
 import httpx
 from httpx import Timeout
-from llama_index.legacy.bridge.pydantic import Field
-from llama_index.legacy.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
-from llama_index.legacy.core.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
     MessageRole,
 )
-from llama_index.legacy.llms.base import llm_chat_callback, llm_completion_callback
-from llama_index.legacy.llms.custom import CustomLLM
 DEFAULT_REQUEST_TIMEOUT = 30.0
-def get_addtional_kwargs(
     response: Dict[str, Any], exclude: Tuple[str, ...]
 ) -> Dict[str, Any]:
     return {k: v for k, v in response.items() if k not in exclude}
 class Ollama(CustomLLM):
     base_url: str = Field(
-        default="http://localhost:11434",
         description="Base url the model is hosted under.",
     )
     model: str = Field(description="The Ollama model to use.")
@@ -51,6 +75,10 @@ class Ollama(CustomLLM):
     prompt_key: str = Field(
         default="prompt", description="The key to use for the prompt in API calls."
     )
     additional_kwargs: Dict[str, Any] = Field(
         default_factory=dict,
         description="Additional model parameters for the Ollama API.",
@@ -98,6 +126,9 @@ class Ollama(CustomLLM):
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/chat",
@@ -110,12 +141,12 @@ class Ollama(CustomLLM):
                 message=ChatMessage(
                     content=message.get("content"),
                     role=MessageRole(message.get("role")),
-                    additional_kwargs=get_addtional_kwargs(
                         message, ("content", "role")
                     ),
                 ),
                 raw=raw,
-                additional_kwargs=get_addtional_kwargs(raw, ("message",)),
             )
     @llm_chat_callback()
@@ -137,6 +168,9 @@ class Ollama(CustomLLM):
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
@@ -157,15 +191,59 @@ class Ollama(CustomLLM):
                             message=ChatMessage(
                                 content=text,
                                 role=MessageRole(message.get("role")),
-                                additional_kwargs=get_addtional_kwargs(
                                     message, ("content", "role")
                                 ),
                             ),
                             delta=delta,
                             raw=chunk,
-                            additional_kwargs=get_addtional_kwargs(chunk, ("message",)),
                         )
     @llm_completion_callback()
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
@@ -178,6 +256,9 @@ class Ollama(CustomLLM):
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/generate",
@@ -189,7 +270,36 @@ class Ollama(CustomLLM):
             return CompletionResponse(
                 text=text,
                 raw=raw,
-                additional_kwargs=get_addtional_kwargs(raw, ("response",)),
             )
     @llm_completion_callback()
@@ -204,6 +314,9 @@ class Ollama(CustomLLM):
             **kwargs,
         }
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
@@ -221,7 +334,46 @@ class Ollama(CustomLLM):
                             delta=delta,
                             text=text,
                             raw=chunk,
-                            additional_kwargs=get_addtional_kwargs(
                                 chunk, ("response",)
                             ),
-                        )

+# https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/llms/llama-index-llms-ollama/llama_index/llms/ollama/base.py
 import json
 from typing import Any, Dict, Sequence, Tuple
 import httpx
 from httpx import Timeout
+from llama_index.core.base.llms.types import (
     ChatMessage,
     ChatResponse,
     ChatResponseGen,
+    ChatResponseAsyncGen,
     CompletionResponse,
+    CompletionResponseAsyncGen,
     CompletionResponseGen,
     LLMMetadata,
     MessageRole,
 )
+from llama_index.core.bridge.pydantic import Field
+from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW, DEFAULT_NUM_OUTPUTS
+from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
+from llama_index.core.llms.custom import CustomLLM
 DEFAULT_REQUEST_TIMEOUT = 30.0
+def get_additional_kwargs(
     response: Dict[str, Any], exclude: Tuple[str, ...]
 ) -> Dict[str, Any]:
     return {k: v for k, v in response.items() if k not in exclude}
 class Ollama(CustomLLM):
+    """Ollama LLM.
+    Visit https://ollama.com/ to download and install Ollama.
+    Run `ollama serve` to start a server.
+    Run `ollama pull <name>` to download a model to run.
+    Examples:
+        `pip install llama-index-llms-ollama`
+        ```python
+        from llama_index.llms.ollama import Ollama
+        llm = Ollama(model="llama2", request_timeout=60.0)
+        response = llm.complete("What is the capital of France?")
+        print(response)
+        ```
+    """
     base_url: str = Field(
+        default="http://localhost:11435",
         description="Base url the model is hosted under.",
     )
     model: str = Field(description="The Ollama model to use.")
     prompt_key: str = Field(
         default="prompt", description="The key to use for the prompt in API calls."
     )
+    json_mode: bool = Field(
+        default=False,
+        description="Whether to use JSON mode for the Ollama API.",
+    )
     additional_kwargs: Dict[str, Any] = Field(
         default_factory=dict,
         description="Additional model parameters for the Ollama API.",
             **kwargs,
         }
+        if self.json_mode:
+            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/chat",
                 message=ChatMessage(
                     content=message.get("content"),
                     role=MessageRole(message.get("role")),
+                    additional_kwargs=get_additional_kwargs(
                         message, ("content", "role")
                     ),
                 ),
                 raw=raw,
+                additional_kwargs=get_additional_kwargs(raw, ("message",)),
             )
     @llm_chat_callback()
             **kwargs,
         }
+        if self.json_mode:
+            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
                             message=ChatMessage(
                                 content=text,
                                 role=MessageRole(message.get("role")),
+                                additional_kwargs=get_additional_kwargs(
                                     message, ("content", "role")
                                 ),
                             ),
                             delta=delta,
                             raw=chunk,
+                            additional_kwargs=get_additional_kwargs(
+                                chunk, ("message",)
+                            ),
                         )
+    @llm_chat_callback()
+    async def achat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseAsyncGen:
+        payload = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": message.role.value,
+                    "content": message.content,
+                    **message.additional_kwargs,
+                }
+                for message in messages
+            ],
+            "options": self._model_kwargs,
+            "stream": False,
+            **kwargs,
+        }
+        if self.json_mode:
+            payload["format"] = "json"
+        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
+            response = await client.post(
+                url=f"{self.base_url}/api/chat",
+                json=payload,
+            )
+            response.raise_for_status()
+            raw = response.json()
+            message = raw["message"]
+            return ChatResponse(
+                message=ChatMessage(
+                    content=message.get("content"),
+                    role=MessageRole(message.get("role")),
+                    additional_kwargs=get_additional_kwargs(
+                        message, ("content", "role")
+                    ),
+                ),
+                raw=raw,
+                additional_kwargs=get_additional_kwargs(raw, ("message",)),
+            )
     @llm_completion_callback()
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
             **kwargs,
         }
+        if self.json_mode:
+            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             response = client.post(
                 url=f"{self.base_url}/api/generate",
             return CompletionResponse(
                 text=text,
                 raw=raw,
+                additional_kwargs=get_additional_kwargs(raw, ("response",)),
+            )
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        payload = {
+            self.prompt_key: prompt,
+            "model": self.model,
+            "options": self._model_kwargs,
+            "stream": False,
+            **kwargs,
+        }
+        if self.json_mode:
+            payload["format"] = "json"
+        async with httpx.AsyncClient(timeout=Timeout(self.request_timeout)) as client:
+            response = await client.post(
+                url=f"{self.base_url}/api/generate",
+                json=payload,
+            )
+            response.raise_for_status()
+            raw = response.json()
+            text = raw.get("response")
+            return CompletionResponse(
+                text=text,
+                raw=raw,
+                additional_kwargs=get_additional_kwargs(raw, ("response",)),
             )
     @llm_completion_callback()
             **kwargs,
         }
+        if self.json_mode:
+            payload["format"] = "json"
         with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
             with client.stream(
                 method="POST",
                             delta=delta,
                             text=text,
                             raw=chunk,
+                            additional_kwargs=get_additional_kwargs(
                                 chunk, ("response",)
                             ),
+                        )
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        payload = {
+            self.prompt_key: prompt,
+            "model": self.model,
+            "options": self._model_kwargs,
+            "stream": True,
+            **kwargs,
+        }
+        if self.json_mode:
+            payload["format"] = "json"
+        async def gen() -> CompletionResponseAsyncGen:
+            async with httpx.AsyncClient(
+                timeout=Timeout(self.request_timeout)
+            ) as client:
+                async with client.stream(
+                    method="POST",
+                    url=f"{self.base_url}/api/generate",
+                    json=payload,
+                ) as response:
+                    async for line in response.aiter_lines():
+                        if line:
+                            chunk = json.loads(line)
+                            delta = chunk.get("response")
+                            yield CompletionResponse(
+                                delta=delta,
+                                text=delta,
+                                raw=chunk,
+                                additional_kwargs=get_additional_kwargs(
+                                    chunk, ("response",)
+                                ),
+                            )
+        return gen()