RAGChatBot / app.py
Engr-Saeed's picture
Update app.py
b240c1f verified
raw
history blame
No virus
3.38 kB
import pandas as pd
from PyPDF2 import PdfReader
from docx import Document
import os
from groq import Groq
import gradio as gr
# Function to read and process different document types
def read_document(file):
try:
file_extension = os.path.splitext(file.name)[-1].lower()
print(f"Processing file: {file.name} with extension {file_extension}")
if file_extension == '.txt':
return file.read().decode('utf-8')
elif file_extension == '.pdf':
reader = PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text()
return text
elif file_extension == '.docx':
doc = Document(file)
return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
elif file_extension in ['.csv', '.xls', '.xlsx']:
df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file)
return df.to_string(index=False)
else:
return "Unsupported file format"
except Exception as e:
print(f"Error processing file: {file.name} - {str(e)}")
return f"Error processing file: {file.name} - {str(e)}"
# Pass your API key directly when initializing the Groq client
api_key = "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" # Replace with your actual API key
client = Groq(api_key=api_key)
client = Groq()
# Function to validate and truncate content to prevent API errors
def validate_content(text):
# Basic validation to remove unwanted characters
validated_text = ''.join(e for e in text if e.isalnum() or e.isspace())
# Truncate text if it's too long
max_length = 8000 # Adjust as needed
if len(validated_text) > max_length:
validated_text = validated_text[:max_length] + "..."
return validated_text
# Function to get an answer from the Groq API
def get_answer(question, model="llama3-8b-8192"):
try:
chat_completion = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": question}],
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"Error in Groq API call: {str(e)}")
if hasattr(e, 'response'):
print(f"Full response: {e.response.json()}")
return f"Error in API call: {str(e)}"
# Function to interface with the Gradio UI
def chatbot_interface(documents, question):
text = ''
for doc in documents:
content = read_document(doc)
text += validate_content(content) + "\n\n"
answer = get_answer(f"{text}\n\nQuestion: {question}")
return answer
# Gradio Interface
with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo:
gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title")
gr.Markdown("Upload documents and ask questions related to them.", elem_id="description")
with gr.Row():
with gr.Column():
doc_input = gr.File(file_count="multiple", label="Upload Documents")
question_input = gr.Textbox(label="Ask a Question")
with gr.Column():
output = gr.Textbox(label="Answer")
submit_button = gr.Button("Get Answer")
submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output)
demo.launch()