fixed bugs for tgi
Browse files- .env.example +1 -1
- app_modules/llm_inference.py +15 -13
- app_modules/llm_loader.py +0 -3
.env.example
CHANGED
@@ -30,7 +30,7 @@ DISABLE_MODEL_PRELOADING=true
|
|
30 |
CHAT_HISTORY_ENABLED=true
|
31 |
SHOW_PARAM_SETTINGS=false
|
32 |
SHARE_GRADIO_APP=false
|
33 |
-
PDF_FILE_BASE_URL=https://
|
34 |
|
35 |
# if unset, default to "hkunlp/instructor-xl"
|
36 |
HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
|
|
|
30 |
CHAT_HISTORY_ENABLED=true
|
31 |
SHOW_PARAM_SETTINGS=false
|
32 |
SHARE_GRADIO_APP=false
|
33 |
+
PDF_FILE_BASE_URL=https://chat-with-llama-2.netlify.app/pdfs/books/
|
34 |
|
35 |
# if unset, default to "hkunlp/instructor-xl"
|
36 |
HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
|
app_modules/llm_inference.py
CHANGED
@@ -51,7 +51,6 @@ class LLMInference(metaclass=abc.ABCMeta):
|
|
51 |
streaming_handler,
|
52 |
)
|
53 |
if streaming_handler is not None
|
54 |
-
and self.llm_loader.streamer.for_huggingface
|
55 |
else chain(inputs)
|
56 |
)
|
57 |
|
@@ -82,20 +81,23 @@ class LLMInference(metaclass=abc.ABCMeta):
|
|
82 |
)
|
83 |
t.start()
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
|
100 |
t.join()
|
101 |
return que.get()
|
|
|
51 |
streaming_handler,
|
52 |
)
|
53 |
if streaming_handler is not None
|
|
|
54 |
else chain(inputs)
|
55 |
)
|
56 |
|
|
|
81 |
)
|
82 |
t.start()
|
83 |
|
84 |
+
if self.llm_loader.streamer.for_huggingface:
|
85 |
+
count = (
|
86 |
+
2
|
87 |
+
if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
|
88 |
+
else 1
|
89 |
+
)
|
90 |
|
91 |
+
while count > 0:
|
92 |
+
try:
|
93 |
+
for token in self.llm_loader.streamer:
|
94 |
+
streaming_handler.on_llm_new_token(token)
|
95 |
|
96 |
+
self.llm_loader.streamer.reset()
|
97 |
+
count -= 1
|
98 |
+
except Exception:
|
99 |
+
print("nothing generated yet - retry in 0.5s")
|
100 |
+
time.sleep(0.5)
|
101 |
|
102 |
t.join()
|
103 |
return que.get()
|
app_modules/llm_loader.py
CHANGED
@@ -66,9 +66,6 @@ class TextIteratorStreamer(TextStreamer, StreamingStdOutCallbackHandler):
|
|
66 |
self.text_queue.put("\n", timeout=self.timeout)
|
67 |
self.text_queue.put(self.stop_signal, timeout=self.timeout)
|
68 |
|
69 |
-
def for_huggingface(self) -> bool:
|
70 |
-
return self.tokenizer != ""
|
71 |
-
|
72 |
def __iter__(self):
|
73 |
return self
|
74 |
|
|
|
66 |
self.text_queue.put("\n", timeout=self.timeout)
|
67 |
self.text_queue.put(self.stop_signal, timeout=self.timeout)
|
68 |
|
|
|
|
|
|
|
69 |
def __iter__(self):
|
70 |
return self
|
71 |
|