Spaces:

dkdaniz
/

katara

Paused

Daniel Marques commited on Oct 15, 2023

Commit

e72e226

•

1 Parent(s): dc8d635

fix: add callback

Files changed (2) hide show

load_models.py CHANGED Viewed

@@ -3,6 +3,7 @@ import logging
 from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import hf_hub_download
 from langchain.llms import LlamaCpp, HuggingFacePipeline
 from transformers import (
     AutoModelForCausalLM,
@@ -204,8 +205,6 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging, stre
     streamer = TextStreamer(tokenizer, skip_prompt=True)
-    logging.info(streamer)
     pipe = pipeline(
         "text-generation",
         model=model,
@@ -217,6 +216,7 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging, stre
         repetition_penalty=1.0,
         generation_config=generation_config,
         streamer=streamer
     )
     local_llm = HuggingFacePipeline(pipeline=pipe)

 from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import hf_hub_download
 from langchain.llms import LlamaCpp, HuggingFacePipeline
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from transformers import (
     AutoModelForCausalLM,
     streamer = TextStreamer(tokenizer, skip_prompt=True)
     pipe = pipeline(
         "text-generation",
         model=model,
         repetition_penalty=1.0,
         generation_config=generation_config,
         streamer=streamer
+        callbacks=[StreamingStdOutCallbackHandler()]
     )
     local_llm = HuggingFacePipeline(pipeline=pipe)

main.py CHANGED Viewed

@@ -179,9 +179,6 @@ async def predict(data: Predict):
                 (os.path.basename(str(document.metadata["source"])), str(document.page_content))
             )
         return {"response": prompt_response_dict}
     else:
         raise HTTPException(status_code=400, detail="Prompt Incorrect")

                 (os.path.basename(str(document.metadata["source"])), str(document.page_content))
             )
         return {"response": prompt_response_dict}
     else:
         raise HTTPException(status_code=400, detail="Prompt Incorrect")