Spaces:
Sleeping
Sleeping
Update helper.py to use correct huggingface inferencing URL
Browse files
helper.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from sentence_transformers import SentenceTransformer
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
from pypdf import PdfReader
|
4 |
import requests
|
@@ -35,9 +35,10 @@ def generate_hypothetical_answer(query):
|
|
35 |
import os
|
36 |
import time
|
37 |
|
38 |
-
# Hugging Face API endpoint
|
39 |
-
api_url = "https://
|
40 |
|
|
|
41 |
# Get API token from environment variable
|
42 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
43 |
if not api_token:
|
@@ -58,20 +59,21 @@ def generate_hypothetical_answer(query):
|
|
58 |
Hypothetical answer:
|
59 |
"""
|
60 |
|
61 |
-
# Prepare the request payload
|
62 |
payload = {
|
63 |
"inputs": prompt,
|
64 |
"parameters": {
|
65 |
"max_new_tokens": 256,
|
66 |
"temperature": 0.7,
|
67 |
"top_p": 0.95,
|
68 |
-
"do_sample": True
|
|
|
69 |
}
|
70 |
}
|
71 |
|
72 |
try:
|
73 |
# Make the API request to Hugging Face
|
74 |
-
print("Sending request to Hugging Face API for hypothetical answer...")
|
75 |
print(f"API URL: {api_url}")
|
76 |
print(f"Headers: {headers}")
|
77 |
print(f"Payload: {json.dumps(payload, indent=2)}")
|
@@ -164,8 +166,8 @@ def query_llm_with_context(query, context, top_n=3):
|
|
164 |
Query: {query}
|
165 |
"""
|
166 |
|
167 |
-
# Hugging Face API endpoint
|
168 |
-
api_url = "https://
|
169 |
|
170 |
# Get API token from environment variable
|
171 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
@@ -179,20 +181,21 @@ def query_llm_with_context(query, context, top_n=3):
|
|
179 |
"Content-Type": "application/json"
|
180 |
}
|
181 |
|
182 |
-
# Prepare the request payload
|
183 |
payload = {
|
184 |
"inputs": prompt,
|
185 |
"parameters": {
|
186 |
"max_new_tokens": 512,
|
187 |
"temperature": 0.7,
|
188 |
"top_p": 0.95,
|
189 |
-
"do_sample": True
|
|
|
190 |
}
|
191 |
}
|
192 |
|
193 |
try:
|
194 |
# Make the API request to Hugging Face
|
195 |
-
print("Sending request to Hugging Face API...")
|
196 |
print(f"API URL: {api_url}")
|
197 |
print(f"Headers: {headers}")
|
198 |
print(f"Payload: {json.dumps(payload, indent=2)}")
|
@@ -239,6 +242,10 @@ def query_llm_with_context(query, context, top_n=3):
|
|
239 |
print(f"HTTP error occurred: {e}")
|
240 |
print(f"Response status code: {e.response.status_code}")
|
241 |
print(f"Response headers: {e.response.headers}")
|
|
|
|
|
|
|
|
|
242 |
|
243 |
if e.response.status_code == 401:
|
244 |
return "Authentication error. Please check your Hugging Face API token."
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
from pypdf import PdfReader
|
4 |
import requests
|
|
|
35 |
import os
|
36 |
import time
|
37 |
|
38 |
+
# Hugging Face API endpoint with vLLM
|
39 |
+
api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
|
40 |
|
41 |
+
|
42 |
# Get API token from environment variable
|
43 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
44 |
if not api_token:
|
|
|
59 |
Hypothetical answer:
|
60 |
"""
|
61 |
|
62 |
+
# Prepare the request payload for vLLM
|
63 |
payload = {
|
64 |
"inputs": prompt,
|
65 |
"parameters": {
|
66 |
"max_new_tokens": 256,
|
67 |
"temperature": 0.7,
|
68 |
"top_p": 0.95,
|
69 |
+
"do_sample": True,
|
70 |
+
"use_vllm": True # Enable vLLM for faster inference
|
71 |
}
|
72 |
}
|
73 |
|
74 |
try:
|
75 |
# Make the API request to Hugging Face
|
76 |
+
print("Sending request to Hugging Face API with vLLM for hypothetical answer...")
|
77 |
print(f"API URL: {api_url}")
|
78 |
print(f"Headers: {headers}")
|
79 |
print(f"Payload: {json.dumps(payload, indent=2)}")
|
|
|
166 |
Query: {query}
|
167 |
"""
|
168 |
|
169 |
+
# Hugging Face API endpoint with vLLM
|
170 |
+
api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
|
171 |
|
172 |
# Get API token from environment variable
|
173 |
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
|
|
181 |
"Content-Type": "application/json"
|
182 |
}
|
183 |
|
184 |
+
# Prepare the request payload for vLLM
|
185 |
payload = {
|
186 |
"inputs": prompt,
|
187 |
"parameters": {
|
188 |
"max_new_tokens": 512,
|
189 |
"temperature": 0.7,
|
190 |
"top_p": 0.95,
|
191 |
+
"do_sample": True,
|
192 |
+
"use_vllm": True # Enable vLLM for faster inference
|
193 |
}
|
194 |
}
|
195 |
|
196 |
try:
|
197 |
# Make the API request to Hugging Face
|
198 |
+
print("Sending request to Hugging Face API with vLLM...")
|
199 |
print(f"API URL: {api_url}")
|
200 |
print(f"Headers: {headers}")
|
201 |
print(f"Payload: {json.dumps(payload, indent=2)}")
|
|
|
242 |
print(f"HTTP error occurred: {e}")
|
243 |
print(f"Response status code: {e.response.status_code}")
|
244 |
print(f"Response headers: {e.response.headers}")
|
245 |
+
try:
|
246 |
+
print(f"Response content: {e.response.text}")
|
247 |
+
except:
|
248 |
+
print("Could not print response content")
|
249 |
|
250 |
if e.response.status_code == 401:
|
251 |
return "Authentication error. Please check your Hugging Face API token."
|