dh-mc commited on
Commit
ec243e6
·
1 Parent(s): 6011708

fixed streaming issue

Browse files
Files changed (3) hide show
  1. app_modules/init.py +2 -2
  2. app_modules/llm_loader.py +5 -2
  3. server.py +1 -1
app_modules/init.py CHANGED
@@ -23,7 +23,7 @@ load_dotenv(found_dotenv, override=False)
23
  init_settings()
24
 
25
 
26
- def app_init(lc_serve: bool = False):
27
  # https://github.com/huggingface/transformers/issues/17611
28
  os.environ["CURL_CA_BUNDLE"] = ""
29
 
@@ -69,7 +69,7 @@ def app_init(lc_serve: bool = False):
69
  print(f"Completed in {end - start:.3f}s")
70
 
71
  start = timer()
72
- llm_loader = LLMLoader(llm_model_type, lc_serve)
73
  llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
74
  qa_chain = QAChain(vectorstore, llm_loader)
75
  end = timer()
 
23
  init_settings()
24
 
25
 
26
+ def app_init():
27
  # https://github.com/huggingface/transformers/issues/17611
28
  os.environ["CURL_CA_BUNDLE"] = ""
29
 
 
69
  print(f"Completed in {end - start:.3f}s")
70
 
71
  start = timer()
72
+ llm_loader = LLMLoader(llm_model_type)
73
  llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
74
  qa_chain = QAChain(vectorstore, llm_loader)
75
  end = timer()
app_modules/llm_loader.py CHANGED
@@ -91,10 +91,13 @@ class LLMLoader:
91
  max_tokens_limit: int
92
  lock: any
93
 
94
- def __init__(self, llm_model_type, lc_serve: bool = False):
95
  self.llm_model_type = llm_model_type
96
  self.llm = None
97
- self.streamer = TextIteratorStreamer("")
 
 
 
98
  self.max_tokens_limit = 2048
99
  self.search_kwargs = {"k": 4}
100
  self.lock = threading.Lock()
 
91
  max_tokens_limit: int
92
  lock: any
93
 
94
+ def __init__(self, llm_model_type):
95
  self.llm_model_type = llm_model_type
96
  self.llm = None
97
+ self.streamer = TextIteratorStreamer(
98
+ "",
99
+ for_huggingface=True,
100
+ )
101
  self.max_tokens_limit = 2048
102
  self.search_kwargs = {"k": 4}
103
  self.lock = threading.Lock()
server.py CHANGED
@@ -11,7 +11,7 @@ from app_modules.init import app_init
11
  from app_modules.llm_chat_chain import ChatChain
12
  from app_modules.utils import print_llm_response
13
 
14
- llm_loader, qa_chain = app_init(__name__ != "__main__")
15
 
16
  chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") == "true"
17
 
 
11
  from app_modules.llm_chat_chain import ChatChain
12
  from app_modules.utils import print_llm_response
13
 
14
+ llm_loader, qa_chain = app_init()
15
 
16
  chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") == "true"
17