Spaces:

avimittal30
/

FinQuery

Sleeping

App Files Files Community

FinQuery / helper.py

avimittal30

Update helper.py

8cd371e verified 3 months ago

raw

history blame contribute delete

9.46 kB

	from sentence_transformers import SentenceTransformer
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from pypdf import PdfReader
	import requests
	import json
	import os
	import time


	def extract_text_from_pdf(pdf_path):
	reader = PdfReader(pdf_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text.strip()

	def chunk_text(text, chunk_size=500, chunk_overlap=100):
	splitter = RecursiveCharacterTextSplitter(
	chunk_size=chunk_size,
	chunk_overlap=chunk_overlap, # Overlap to preserve context
	separators=["\n\n", "\n", " ", ""], # Prioritize logical breaks
	)
	return splitter.split_text(text)

	embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	def embedding_function(texts):
	return embedding_model.encode(texts, convert_to_numpy=True).tolist()



	def generate_hypothetical_answer(query):
	import requests
	import json
	import os
	import time

	# Hugging Face API endpoint with vLLM
	api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"


	# Get API token from environment variable
	api_token = os.getenv("HUGGINGFACE_API_TOKEN")
	if not api_token:
	print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
	return "Error: HUGGINGFACE_API_TOKEN environment variable not set"

	# Headers for the API request
	headers = {
	"Authorization": f"Bearer {api_token}",
	"Content-Type": "application/json"
	}

	# Create a prompt for generating a hypothetical answer
	prompt = f"""
	Given the following query, generate a hypothetical answer that might be found in a document:
	Query: {query}

	Hypothetical answer:
	"""

	# Prepare the request payload for vLLM
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 256,
	"temperature": 0.7,
	"top_p": 0.95,
	"do_sample": True,
	"use_vllm": True # Enable vLLM for faster inference
	}
	}

	try:
	# Make the API request to Hugging Face
	print("Sending request to Hugging Face API with vLLM for hypothetical answer...")
	print(f"API URL: {api_url}")
	print(f"Headers: {headers}")
	print(f"Payload: {json.dumps(payload, indent=2)}")

	start_time = time.time()

	# Set a longer timeout (5 minutes)
	response = requests.post(api_url, headers=headers, json=payload, timeout=300)

	end_time = time.time()
	print(f"Received hypothetical answer from Hugging Face API in {end_time - start_time:.2f} seconds")
	print(f"Response status code: {response.status_code}")
	print(f"Response headers: {response.headers}")

	# Try to print the response content for debugging
	try:
	print(f"Response content: {response.text[:1000]}...") # Print first 1000 chars
	except:
	print("Could not print response content")

	response.raise_for_status() # Raise an exception for HTTP errors

	# Parse the response
	result = response.json()
	print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...") # Print first 1000 chars

	# Extract the generated text
	if isinstance(result, list) and len(result) > 0:
	generated_text = result[0].get("generated_text", "")
	else:
	generated_text = result.get("generated_text", "")

	return generated_text.strip()

	except requests.exceptions.Timeout:
	print("Request to Hugging Face API timed out after 5 minutes")
	return "The request timed out. The model is taking too long to respond. Please try again with a simpler query."

	except requests.exceptions.ConnectionError:
	print("Could not connect to Hugging Face API")
	return "Could not connect to the Hugging Face API. Please check your internet connection."

	except requests.exceptions.HTTPError as e:
	print(f"HTTP error occurred: {e}")
	print(f"Response status code: {e.response.status_code}")
	print(f"Response headers: {e.response.headers}")
	try:
	print(f"Response content: {e.response.text}")
	except:
	print("Could not print response content")

	if e.response.status_code == 401:
	return "Authentication error. Please check your Hugging Face API token."
	elif e.response.status_code == 429:
	return "Rate limit exceeded. Please try again later."
	return f"HTTP error occurred: {e}"

	except Exception as e:
	print(f"Error generating hypothetical answer: {e}")
	import traceback
	print(f"Traceback: {traceback.format_exc()}")
	return "Failed to generate a hypothetical answer."




	def query_llm_with_context(query, context, top_n=3):
	import requests
	import json
	import os
	import time

	# Unpack the context tuple
	documents, similarity_scores = context

	# Use only the top N documents
	top_docs = documents[:top_n]

	# Create a context string by joining the top documents
	context_text = "\n\n===Document Boundary===\n\n".join(top_docs)

	# Create a prompt with the context and query
	prompt = f"""
	Context information is below.
	---------------------
	{context_text}
	---------------------

	Given the context information and not prior knowledge, answer the following query:
	Query: {query}
	"""

	# Hugging Face API endpoint with vLLM
	api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"

	# Get API token from environment variable
	api_token = os.getenv("HUGGINGFACE_API_TOKEN")
	if not api_token:
	print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
	return "Error: HUGGINGFACE_API_TOKEN environment variable not set"

	# Headers for the API request
	headers = {
	"Authorization": f"Bearer {api_token}",
	"Content-Type": "application/json"
	}

	# Prepare the request payload for vLLM
	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": 512,
	"temperature": 0.7,
	"top_p": 0.95,
	"do_sample": True,
	"use_vllm": True # Enable vLLM for faster inference
	}
	}

	try:
	# Make the API request to Hugging Face
	print("Sending request to Hugging Face API with vLLM...")
	print(f"API URL: {api_url}")
	print(f"Headers: {headers}")
	print(f"Payload: {json.dumps(payload, indent=2)}")

	start_time = time.time()

	# Set a longer timeout (5 minutes)
	response = requests.post(api_url, headers=headers, json=payload, timeout=300)

	end_time = time.time()
	print(f"Received response from Hugging Face API in {end_time - start_time:.2f} seconds")
	print(f"Response status code: {response.status_code}")
	print(f"Response headers: {response.headers}")

	# Try to print the response content for debugging
	try:
	print(f"Response content: {response.text[:1000]}...") # Print first 1000 chars
	except:
	print("Could not print response content")

	response.raise_for_status() # Raise an exception for HTTP errors

	# Parse the response
	result = response.json()
	print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...") # Print first 1000 chars

	# Extract the generated text
	if isinstance(result, list) and len(result) > 0:
	generated_text = result[0].get("generated_text", "")
	else:
	generated_text = result.get("generated_text", "")

	return generated_text.strip()

	except requests.exceptions.Timeout:
	print("Request to Hugging Face API timed out after 5 minutes")
	return "The request timed out. The model is taking too long to respond. Please try again with a simpler query or fewer context documents."

	except requests.exceptions.ConnectionError:
	print("Could not connect to Hugging Face API")
	return "Could not connect to the Hugging Face API. Please check your internet connection."

	except requests.exceptions.HTTPError as e:
	print(f"HTTP error occurred: {e}")
	print(f"Response status code: {e.response.status_code}")
	print(f"Response headers: {e.response.headers}")
	try:
	print(f"Response content: {e.response.text}")
	except:
	print("Could not print response content")

	if e.response.status_code == 401:
	return "Authentication error. Please check your Hugging Face API token."
	elif e.response.status_code == 429:
	return "Rate limit exceeded. Please try again later."
	return f"HTTP error occurred: {e}"

	except Exception as e:
	print(f"Error querying LLM with context: {e}")
	import traceback
	print(f"Traceback: {traceback.format_exc()}")
	return "Failed to generate an answer with the provided context."