Spaces:
Sleeping
Sleeping
import pandas as pd | |
from PyPDF2 import PdfReader | |
from docx import Document | |
import os | |
from groq import Groq | |
import gradio as gr | |
# Function to read and process different document types | |
def read_document(file): | |
try: | |
file_extension = os.path.splitext(file.name)[-1].lower() | |
print(f"Processing file: {file.name} with extension {file_extension}") | |
if file_extension == '.txt': | |
return file.read().decode('utf-8') | |
elif file_extension == '.pdf': | |
reader = PdfReader(file) | |
text = '' | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
elif file_extension == '.docx': | |
doc = Document(file) | |
return '\n'.join([paragraph.text for paragraph in doc.paragraphs]) | |
elif file_extension in ['.csv', '.xls', '.xlsx']: | |
df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file) | |
return df.to_string(index=False) | |
else: | |
return "Unsupported file format" | |
except Exception as e: | |
print(f"Error processing file: {file.name} - {str(e)}") | |
return f"Error processing file: {file.name} - {str(e)}" | |
# Retrieve API key from environment variable | |
api_key = os.getenv("GROQ_API_KEY") # This assumes you have set the environment variable in Hugging Face Space | |
if not api_key: | |
raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.") | |
client = Groq(api_key=api_key) | |
# Function to validate and truncate content to prevent API errors | |
def validate_content(text): | |
validated_text = ''.join(e for e in text if e.isalnum() or e.isspace()) | |
max_length = 8000 # Adjust as needed | |
if len(validated_text) > max_length: | |
validated_text = validated_text[:max_length] + "..." | |
return validated_text | |
# Function to get an answer from the Groq API | |
def get_answer(question, model="llama3-8b-8192"): | |
try: | |
chat_completion = client.chat.completions.create( | |
model=model, | |
messages=[{"role": "user", "content": question}], | |
) | |
return chat_completion.choices[0].message.content | |
except Exception as e: | |
print(f"Error in Groq API call: {str(e)}") | |
if hasattr(e, 'response'): | |
print(f"Full response: {e.response.json()}") | |
return f"Error in API call: {str(e)}" | |
# Function to interface with the Gradio UI | |
def chatbot_interface(documents, question): | |
text = '' | |
for doc in documents: | |
content = read_document(doc) | |
text += validate_content(content) + "\n\n" | |
answer = get_answer(f"{text}\n\nQuestion: {question}") | |
return answer | |
# Gradio Interface | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo: | |
gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title") | |
gr.Markdown("Upload documents and ask questions related to them.", elem_id="description") | |
with gr.Row(): | |
with gr.Column(): | |
doc_input = gr.File(file_count="multiple", label="Upload Documents") | |
question_input = gr.Textbox(label="Ask a Question") | |
with gr.Column(): | |
output = gr.Textbox(label="Answer") | |
submit_button = gr.Button("Get Answer") | |
submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output) | |
demo.launch() | |