Spaces:
Sleeping
Sleeping
File size: 5,636 Bytes
21753a3 d2c6ac6 4431829 d2c6ac6 b14a2f9 21753a3 36eb467 b14a2f9 1fda785 b14a2f9 67d8896 d2c6ac6 b14a2f9 bf5d856 b74d72a d2c6ac6 bf5d856 94f2884 bf5d856 21753a3 bf5d856 d2c6ac6 a3dcdff d2c6ac6 21753a3 d2c6ac6 21753a3 94f2884 21753a3 4c4c3fc 21753a3 4c4c3fc bf5d856 21753a3 b15d87a bf5d856 b15d87a 21753a3 b15d87a bf5d856 b15d87a bf5d856 b15d87a 21753a3 b15d87a d2c6ac6 21753a3 bf5d856 fb8d4f3 21753a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
from flask import Flask, request, jsonify, render_template
import fitz # PyMuPDF for PDF text extraction
import faiss # FAISS for vector search
import numpy as np
import os
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
from typing import List, Tuple
app = Flask(__name__, template_folder=os.getcwd())
# Default settings
class ChatConfig:
MODEL = "meta-llama/Llama-3.3-70B-Instruct" # Change back to Gemma
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
DEFAULT_MAX_TOKENS = 512
DEFAULT_TEMP = 0.3
DEFAULT_TOP_P = 0.95
# Get the token from environment variable
HF_TOKEN = os.getenv('HF_TOKEN')
client = InferenceClient(
ChatConfig.MODEL,
token=HF_TOKEN
)
embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
vector_dim = 384 # Embedding size
index = faiss.IndexFlatL2(vector_dim) # FAISS index
documents = [] # Store extracted text
def extract_text_from_pdf(pdf_stream):
"""Extracts text from PDF stream"""
doc = fitz.open(stream=pdf_stream, filetype="pdf")
text_chunks = [page.get_text("text") for page in doc]
doc.close()
return text_chunks
def create_vector_db(text_chunks):
"""Embeds text chunks and adds them to FAISS index"""
global documents, index
# Reinitialize the FAISS index
index = faiss.IndexFlatL2(vector_dim)
documents = text_chunks
embeddings = embed_model.encode(text_chunks)
# Convert embeddings to np.float32 for FAISS
embeddings = np.array(embeddings, dtype=np.float32)
# Ensure that embeddings have the correct shape (should be 2D, with each vector having the right dimension)
if embeddings.ndim == 1: # If only one embedding, reshape it
embeddings = embeddings.reshape(1, -1)
# Add embeddings to the FAISS index
index.add(embeddings)
# Check if adding was successful (optional)
if index.ntotal == 0:
print("Error: FAISS index is empty after adding embeddings.")
def search_relevant_text(query):
"""Finds the most relevant text chunk for the given query"""
query_embedding = embed_model.encode([query])
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
return "\n".join([documents[i] for i in closest_idx[0]])
def generate_response(
message: str,
history: List[Tuple[str, str]],
system_message: str = ChatConfig.DEFAULT_SYSTEM_MSG,
max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
temperature: float = ChatConfig.DEFAULT_TEMP,
top_p: float = ChatConfig.DEFAULT_TOP_P
) -> str:
if not documents:
return "Please upload a PDF first."
context = search_relevant_text(message) # Get relevant content from PDF
# Start with the system message in the first user message
messages = []
first_msg = f"{system_message}\n\nContext: {context}\nQuestion: {message}"
messages.append({"role": "user", "content": first_msg})
# Add conversation history ensuring alternating pattern (user, assistant, user, assistant...)
for user_msg, bot_msg in history:
if user_msg.strip(): # Check if user message is not empty
if messages and messages[-1]['content'] != user_msg: # Check for duplicates
messages.append({"role": "user", "content": user_msg})
if bot_msg.strip(): # Check if assistant message is not empty
if messages and messages[-1]['content'] != bot_msg: # Check for duplicates
messages.append({"role": "assistant", "content": bot_msg})
try:
response = ""
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
response += token
yield response
except Exception as e:
print(f"Error generating response: {str(e)}")
yield "I apologize, but I encountered an error while generating the response."
@app.route('/')
def index():
"""Serve the HTML page for the user interface"""
return render_template('index.html')
@app.route('/upload_pdf', methods=['POST'])
def upload_pdf():
"""Handle PDF upload"""
if 'pdf' not in request.files:
return jsonify({"error": "No file part"}), 400
file = request.files['pdf']
if file.filename == "":
return jsonify({"error": "No selected file"}), 400
try:
# Read the file directly into memory instead of saving to disk
pdf_stream = file.read()
# Create a BytesIO object to work with the PDF in memory
from io import BytesIO
pdf_stream = BytesIO(pdf_stream)
# Use fitz to open the PDF from memory
doc = fitz.open(stream=pdf_stream, filetype="pdf")
text_chunks = [page.get_text("text") for page in doc]
doc.close()
# Create vector database
create_vector_db(text_chunks)
return jsonify({"message": "PDF uploaded and indexed successfully!"}), 200
except Exception as e:
return jsonify({"error": f"Error processing file: {str(e)}"}), 500
@app.route('/ask_question', methods=['POST'])
def ask_question():
"""Handle user question"""
message = request.json.get('message')
history = request.json.get('history', [])
response = generate_response(message, history)
return jsonify({"response": "".join(response)}) # Join all streamed responses
if __name__ == '__main__':
app.run(debug=True)
|