RAGChatBot / app.py
Engr-Saeed's picture
Update app.py
cc744c0 verified
raw
history blame
3.37 kB
import pandas as pd
from PyPDF2 import PdfReader
from docx import Document
import os
from groq import Groq
import gradio as gr
# Function to read and process different document types
def read_document(file):
try:
file_extension = os.path.splitext(file.name)[-1].lower()
print(f"Processing file: {file.name} with extension {file_extension}")
if file_extension == '.txt':
return file.read().decode('utf-8')
elif file_extension == '.pdf':
reader = PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
elif file_extension == '.docx':
doc = Document(file)
return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
elif file_extension in ['.csv', '.xls', '.xlsx']:
df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file)
return df.to_string(index=False)
else:
return "Unsupported file format"
except Exception as e:
print(f"Error processing file: {file.name} - {str(e)}")
return f"Error processing file: {file.name} - {str(e)}"
# Retrieve API key from environment variable
api_key = os.getenv("GROQ_API_KEY") # This assumes you have set the environment variable in Hugging Face Space
if not api_key:
raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.")
client = Groq(api_key=api_key)
# Function to validate and truncate content to prevent API errors
def validate_content(text):
validated_text = ''.join(e for e in text if e.isalnum() or e.isspace())
max_length = 8000 # Adjust as needed
if len(validated_text) > max_length:
validated_text = validated_text[:max_length] + "..."
return validated_text
# Function to get an answer from the Groq API
def get_answer(question, model="llama3-8b-8192"):
try:
chat_completion = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": question}],
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"Error in Groq API call: {str(e)}")
if hasattr(e, 'response'):
print(f"Full response: {e.response.json()}")
return f"Error in API call: {str(e)}"
# Function to interface with the Gradio UI
def chatbot_interface(documents, question):
text = ''
for doc in documents:
content = read_document(doc)
text += validate_content(content) + "\n\n"
answer = get_answer(f"{text}\n\nQuestion: {question}")
return answer
# Gradio Interface
with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo:
gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title")
gr.Markdown("Upload documents and ask questions related to them.", elem_id="description")
with gr.Row():
with gr.Column():
doc_input = gr.File(file_count="multiple", label="Upload Documents")
question_input = gr.Textbox(label="Ask a Question")
with gr.Column():
output = gr.Textbox(label="Answer")
submit_button = gr.Button("Get Answer")
submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output)
demo.launch()