ppsingh commited on
Commit
06bf5a5
1 Parent(s): 5620e1f

Create reader.py

Browse files
Files changed (1) hide show
  1. auditqa/reader.py +39 -0
auditqa/reader.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from auditqa.process_chunks import getconfig
3
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
4
+ from langchain_community.llms import HuggingFaceEndpoint
5
+ from langchain_community.chat_models.huggingface import ChatHuggingFace
6
+ import os
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ model_config = getconfig("model_params.cfg")
11
+ NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
12
+ HF_token = os.environ["LLAMA_3_1"]
13
+
14
+
15
+ def nvidia_client():
16
+ client = InferenceClient(
17
+ base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
18
+ api_key=NVIDIA_SERVER)
19
+
20
+ return client
21
+
22
+ def dedicated_endpoint():
23
+ # Set up the streaming callback handler
24
+ callback = StreamingStdOutCallbackHandler()
25
+
26
+ # Initialize the HuggingFaceEndpoint with streaming enabled
27
+ llm_qa = HuggingFaceEndpoint(
28
+ endpoint_url=model_config.get('reader', 'DEDICATED_ENDPOINT'),
29
+ max_new_tokens=int(model_config.get('reader','MAX_TOKENS')),
30
+ repetition_penalty=1.03,
31
+ timeout=70,
32
+ huggingfacehub_api_token=HF_token,
33
+ streaming=True, # Enable streaming for real-time token generation
34
+ callbacks=[callback] # Add the streaming callback handler
35
+ )
36
+
37
+ # Create a ChatHuggingFace instance with the streaming-enabled endpoint
38
+ chat_model = ChatHuggingFace(llm=llm_qa)
39
+ return chat_model