Commit
·
4c2ac8a
1
Parent(s):
5b11730
Add ability to passadditional parameters to ollama library like host and timeout
Browse files- .gitignore +2 -1
- examples/lightrag_ollama_demo.py +3 -0
- lightrag/lightrag.py +2 -1
- lightrag/llm.py +6 -3
.gitignore
CHANGED
|
@@ -4,4 +4,5 @@ dickens/
|
|
| 4 |
book.txt
|
| 5 |
lightrag-dev/
|
| 6 |
.idea/
|
| 7 |
-
dist/
|
|
|
|
|
|
| 4 |
book.txt
|
| 5 |
lightrag-dev/
|
| 6 |
.idea/
|
| 7 |
+
dist/
|
| 8 |
+
.venv/
|
examples/lightrag_ollama_demo.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from lightrag import LightRAG, QueryParam
|
| 4 |
from lightrag.llm import ollama_model_complete, ollama_embedding
|
|
|
|
| 1 |
import os
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
|
| 5 |
|
| 6 |
from lightrag import LightRAG, QueryParam
|
| 7 |
from lightrag.llm import ollama_model_complete, ollama_embedding
|
lightrag/lightrag.py
CHANGED
|
@@ -88,6 +88,7 @@ class LightRAG:
|
|
| 88 |
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
| 89 |
llm_model_max_token_size: int = 32768
|
| 90 |
llm_model_max_async: int = 16
|
|
|
|
| 91 |
|
| 92 |
# storage
|
| 93 |
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
|
|
@@ -154,7 +155,7 @@ class LightRAG:
|
|
| 154 |
)
|
| 155 |
|
| 156 |
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
| 157 |
-
partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
|
| 158 |
)
|
| 159 |
|
| 160 |
def insert(self, string_or_strings):
|
|
|
|
| 88 |
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
| 89 |
llm_model_max_token_size: int = 32768
|
| 90 |
llm_model_max_async: int = 16
|
| 91 |
+
llm_model_kwargs: dict = field(default_factory=dict)
|
| 92 |
|
| 93 |
# storage
|
| 94 |
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
|
|
|
|
| 155 |
)
|
| 156 |
|
| 157 |
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
| 158 |
+
partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
|
| 159 |
)
|
| 160 |
|
| 161 |
def insert(self, string_or_strings):
|
lightrag/llm.py
CHANGED
|
@@ -222,8 +222,10 @@ async def ollama_model_if_cache(
|
|
| 222 |
) -> str:
|
| 223 |
kwargs.pop("max_tokens", None)
|
| 224 |
kwargs.pop("response_format", None)
|
|
|
|
|
|
|
| 225 |
|
| 226 |
-
ollama_client = ollama.AsyncClient()
|
| 227 |
messages = []
|
| 228 |
if system_prompt:
|
| 229 |
messages.append({"role": "system", "content": system_prompt})
|
|
@@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
|
|
| 415 |
return embeddings.detach().numpy()
|
| 416 |
|
| 417 |
|
| 418 |
-
async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
|
| 419 |
embed_text = []
|
|
|
|
| 420 |
for text in texts:
|
| 421 |
-
data =
|
| 422 |
embed_text.append(data["embedding"])
|
| 423 |
|
| 424 |
return embed_text
|
|
|
|
| 222 |
) -> str:
|
| 223 |
kwargs.pop("max_tokens", None)
|
| 224 |
kwargs.pop("response_format", None)
|
| 225 |
+
host = kwargs.pop("host", None)
|
| 226 |
+
timeout = kwargs.pop("timeout", None)
|
| 227 |
|
| 228 |
+
ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
|
| 229 |
messages = []
|
| 230 |
if system_prompt:
|
| 231 |
messages.append({"role": "system", "content": system_prompt})
|
|
|
|
| 417 |
return embeddings.detach().numpy()
|
| 418 |
|
| 419 |
|
| 420 |
+
async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
| 421 |
embed_text = []
|
| 422 |
+
ollama_client = ollama.Client(**kwargs)
|
| 423 |
for text in texts:
|
| 424 |
+
data = ollama_client.embeddings(model=embed_model, prompt=text)
|
| 425 |
embed_text.append(data["embedding"])
|
| 426 |
|
| 427 |
return embed_text
|