RAGChatBot / app.py
Engr-Saeed's picture
Update app.py
cc744c0 verified
import pandas as pd
from PyPDF2 import PdfReader
from docx import Document
import os
from groq import Groq
import gradio as gr
# Function to read and process different document types
def read_document(file):
try:
file_extension = os.path.splitext(file.name)[-1].lower()
print(f"Processing file: {file.name} with extension {file_extension}")
if file_extension == '.txt':
return file.read().decode('utf-8')
elif file_extension == '.pdf':
reader = PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
elif file_extension == '.docx':
doc = Document(file)
return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
elif file_extension in ['.csv', '.xls', '.xlsx']:
df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file)
return df.to_string(index=False)
else:
return "Unsupported file format"
except Exception as e:
print(f"Error processing file: {file.name} - {str(e)}")
return f"Error processing file: {file.name} - {str(e)}"
# Retrieve API key from environment variable
api_key = os.getenv("GROQ_API_KEY") # This assumes you have set the environment variable in Hugging Face Space
if not api_key:
raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.")
client = Groq(api_key=api_key)
# Function to validate and truncate content to prevent API errors
def validate_content(text):
validated_text = ''.join(e for e in text if e.isalnum() or e.isspace())
max_length = 8000 # Adjust as needed
if len(validated_text) > max_length:
validated_text = validated_text[:max_length] + "..."
return validated_text
# Function to get an answer from the Groq API
def get_answer(question, model="llama3-8b-8192"):
try:
chat_completion = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": question}],
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"Error in Groq API call: {str(e)}")
if hasattr(e, 'response'):
print(f"Full response: {e.response.json()}")
return f"Error in API call: {str(e)}"
# Function to interface with the Gradio UI
def chatbot_interface(documents, question):
text = ''
for doc in documents:
content = read_document(doc)
text += validate_content(content) + "\n\n"
answer = get_answer(f"{text}\n\nQuestion: {question}")
return answer
# Gradio Interface
with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo:
gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title")
gr.Markdown("Upload documents and ask questions related to them.", elem_id="description")
with gr.Row():
with gr.Column():
doc_input = gr.File(file_count="multiple", label="Upload Documents")
question_input = gr.Textbox(label="Ask a Question")
with gr.Column():
output = gr.Textbox(label="Answer")
submit_button = gr.Button("Get Answer")
submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output)
demo.launch()