FinQuery / helper.py
avimittal30's picture
Update helper.py
8cd371e verified
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pypdf import PdfReader
import requests
import json
import os
import time
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text.strip()
def chunk_text(text, chunk_size=500, chunk_overlap=100):
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap, # Overlap to preserve context
separators=["\n\n", "\n", " ", ""], # Prioritize logical breaks
)
return splitter.split_text(text)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def embedding_function(texts):
return embedding_model.encode(texts, convert_to_numpy=True).tolist()
def generate_hypothetical_answer(query):
import requests
import json
import os
import time
# Hugging Face API endpoint with vLLM
api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
# Get API token from environment variable
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
if not api_token:
print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
return "Error: HUGGINGFACE_API_TOKEN environment variable not set"
# Headers for the API request
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
# Create a prompt for generating a hypothetical answer
prompt = f"""
Given the following query, generate a hypothetical answer that might be found in a document:
Query: {query}
Hypothetical answer:
"""
# Prepare the request payload for vLLM
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 256,
"temperature": 0.7,
"top_p": 0.95,
"do_sample": True,
"use_vllm": True # Enable vLLM for faster inference
}
}
try:
# Make the API request to Hugging Face
print("Sending request to Hugging Face API with vLLM for hypothetical answer...")
print(f"API URL: {api_url}")
print(f"Headers: {headers}")
print(f"Payload: {json.dumps(payload, indent=2)}")
start_time = time.time()
# Set a longer timeout (5 minutes)
response = requests.post(api_url, headers=headers, json=payload, timeout=300)
end_time = time.time()
print(f"Received hypothetical answer from Hugging Face API in {end_time - start_time:.2f} seconds")
print(f"Response status code: {response.status_code}")
print(f"Response headers: {response.headers}")
# Try to print the response content for debugging
try:
print(f"Response content: {response.text[:1000]}...") # Print first 1000 chars
except:
print("Could not print response content")
response.raise_for_status() # Raise an exception for HTTP errors
# Parse the response
result = response.json()
print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...") # Print first 1000 chars
# Extract the generated text
if isinstance(result, list) and len(result) > 0:
generated_text = result[0].get("generated_text", "")
else:
generated_text = result.get("generated_text", "")
return generated_text.strip()
except requests.exceptions.Timeout:
print("Request to Hugging Face API timed out after 5 minutes")
return "The request timed out. The model is taking too long to respond. Please try again with a simpler query."
except requests.exceptions.ConnectionError:
print("Could not connect to Hugging Face API")
return "Could not connect to the Hugging Face API. Please check your internet connection."
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
print(f"Response status code: {e.response.status_code}")
print(f"Response headers: {e.response.headers}")
try:
print(f"Response content: {e.response.text}")
except:
print("Could not print response content")
if e.response.status_code == 401:
return "Authentication error. Please check your Hugging Face API token."
elif e.response.status_code == 429:
return "Rate limit exceeded. Please try again later."
return f"HTTP error occurred: {e}"
except Exception as e:
print(f"Error generating hypothetical answer: {e}")
import traceback
print(f"Traceback: {traceback.format_exc()}")
return "Failed to generate a hypothetical answer."
def query_llm_with_context(query, context, top_n=3):
import requests
import json
import os
import time
# Unpack the context tuple
documents, similarity_scores = context
# Use only the top N documents
top_docs = documents[:top_n]
# Create a context string by joining the top documents
context_text = "\n\n===Document Boundary===\n\n".join(top_docs)
# Create a prompt with the context and query
prompt = f"""
Context information is below.
---------------------
{context_text}
---------------------
Given the context information and not prior knowledge, answer the following query:
Query: {query}
"""
# Hugging Face API endpoint with vLLM
api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
# Get API token from environment variable
api_token = os.getenv("HUGGINGFACE_API_TOKEN")
if not api_token:
print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
return "Error: HUGGINGFACE_API_TOKEN environment variable not set"
# Headers for the API request
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
# Prepare the request payload for vLLM
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 512,
"temperature": 0.7,
"top_p": 0.95,
"do_sample": True,
"use_vllm": True # Enable vLLM for faster inference
}
}
try:
# Make the API request to Hugging Face
print("Sending request to Hugging Face API with vLLM...")
print(f"API URL: {api_url}")
print(f"Headers: {headers}")
print(f"Payload: {json.dumps(payload, indent=2)}")
start_time = time.time()
# Set a longer timeout (5 minutes)
response = requests.post(api_url, headers=headers, json=payload, timeout=300)
end_time = time.time()
print(f"Received response from Hugging Face API in {end_time - start_time:.2f} seconds")
print(f"Response status code: {response.status_code}")
print(f"Response headers: {response.headers}")
# Try to print the response content for debugging
try:
print(f"Response content: {response.text[:1000]}...") # Print first 1000 chars
except:
print("Could not print response content")
response.raise_for_status() # Raise an exception for HTTP errors
# Parse the response
result = response.json()
print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...") # Print first 1000 chars
# Extract the generated text
if isinstance(result, list) and len(result) > 0:
generated_text = result[0].get("generated_text", "")
else:
generated_text = result.get("generated_text", "")
return generated_text.strip()
except requests.exceptions.Timeout:
print("Request to Hugging Face API timed out after 5 minutes")
return "The request timed out. The model is taking too long to respond. Please try again with a simpler query or fewer context documents."
except requests.exceptions.ConnectionError:
print("Could not connect to Hugging Face API")
return "Could not connect to the Hugging Face API. Please check your internet connection."
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
print(f"Response status code: {e.response.status_code}")
print(f"Response headers: {e.response.headers}")
try:
print(f"Response content: {e.response.text}")
except:
print("Could not print response content")
if e.response.status_code == 401:
return "Authentication error. Please check your Hugging Face API token."
elif e.response.status_code == 429:
return "Rate limit exceeded. Please try again later."
return f"HTTP error occurred: {e}"
except Exception as e:
print(f"Error querying LLM with context: {e}")
import traceback
print(f"Traceback: {traceback.format_exc()}")
return "Failed to generate an answer with the provided context."