alazarchuk commited on
Commit
4c2ac8a
·
1 Parent(s): 5b11730

Add ability to passadditional parameters to ollama library like host and timeout

Browse files
.gitignore CHANGED
@@ -4,4 +4,5 @@ dickens/
4
  book.txt
5
  lightrag-dev/
6
  .idea/
7
- dist/
 
 
4
  book.txt
5
  lightrag-dev/
6
  .idea/
7
+ dist/
8
+ .venv/
examples/lightrag_ollama_demo.py CHANGED
@@ -1,4 +1,7 @@
1
  import os
 
 
 
2
 
3
  from lightrag import LightRAG, QueryParam
4
  from lightrag.llm import ollama_model_complete, ollama_embedding
 
1
  import os
2
+ import logging
3
+
4
+ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
5
 
6
  from lightrag import LightRAG, QueryParam
7
  from lightrag.llm import ollama_model_complete, ollama_embedding
lightrag/lightrag.py CHANGED
@@ -88,6 +88,7 @@ class LightRAG:
88
  llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
89
  llm_model_max_token_size: int = 32768
90
  llm_model_max_async: int = 16
 
91
 
92
  # storage
93
  key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
@@ -154,7 +155,7 @@ class LightRAG:
154
  )
155
 
156
  self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
157
- partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
158
  )
159
 
160
  def insert(self, string_or_strings):
 
88
  llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
89
  llm_model_max_token_size: int = 32768
90
  llm_model_max_async: int = 16
91
+ llm_model_kwargs: dict = field(default_factory=dict)
92
 
93
  # storage
94
  key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
 
155
  )
156
 
157
  self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
158
+ partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
159
  )
160
 
161
  def insert(self, string_or_strings):
lightrag/llm.py CHANGED
@@ -222,8 +222,10 @@ async def ollama_model_if_cache(
222
  ) -> str:
223
  kwargs.pop("max_tokens", None)
224
  kwargs.pop("response_format", None)
 
 
225
 
226
- ollama_client = ollama.AsyncClient()
227
  messages = []
228
  if system_prompt:
229
  messages.append({"role": "system", "content": system_prompt})
@@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
415
  return embeddings.detach().numpy()
416
 
417
 
418
- async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
419
  embed_text = []
 
420
  for text in texts:
421
- data = ollama.embeddings(model=embed_model, prompt=text)
422
  embed_text.append(data["embedding"])
423
 
424
  return embed_text
 
222
  ) -> str:
223
  kwargs.pop("max_tokens", None)
224
  kwargs.pop("response_format", None)
225
+ host = kwargs.pop("host", None)
226
+ timeout = kwargs.pop("timeout", None)
227
 
228
+ ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
229
  messages = []
230
  if system_prompt:
231
  messages.append({"role": "system", "content": system_prompt})
 
417
  return embeddings.detach().numpy()
418
 
419
 
420
+ async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
421
  embed_text = []
422
+ ollama_client = ollama.Client(**kwargs)
423
  for text in texts:
424
+ data = ollama_client.embeddings(model=embed_model, prompt=text)
425
  embed_text.append(data["embedding"])
426
 
427
  return embed_text