import pandas as pd from PyPDF2 import PdfReader from docx import Document import os from groq import Groq import gradio as gr # Function to read and process different document types def read_document(file): try: file_extension = os.path.splitext(file.name)[-1].lower() print(f"Processing file: {file.name} with extension {file_extension}") if file_extension == '.txt': return file.read().decode('utf-8') elif file_extension == '.pdf': reader = PdfReader(file) text = '' for page in reader.pages: text += page.extract_text() return text elif file_extension == '.docx': doc = Document(file) return '\n'.join([paragraph.text for paragraph in doc.paragraphs]) elif file_extension in ['.csv', '.xls', '.xlsx']: df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file) return df.to_string(index=False) else: return "Unsupported file format" except Exception as e: print(f"Error processing file: {file.name} - {str(e)}") return f"Error processing file: {file.name} - {str(e)}" # Retrieve API key from environment variable api_key = os.getenv("GROQ_API_KEY") # This assumes you have set the environment variable in Hugging Face Space if not api_key: raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.") client = Groq(api_key=api_key) # Function to validate and truncate content to prevent API errors def validate_content(text): validated_text = ''.join(e for e in text if e.isalnum() or e.isspace()) max_length = 8000 # Adjust as needed if len(validated_text) > max_length: validated_text = validated_text[:max_length] + "..." return validated_text # Function to get an answer from the Groq API def get_answer(question, model="llama3-8b-8192"): try: chat_completion = client.chat.completions.create( model=model, messages=[{"role": "user", "content": question}], ) return chat_completion.choices[0].message.content except Exception as e: print(f"Error in Groq API call: {str(e)}") if hasattr(e, 'response'): print(f"Full response: {e.response.json()}") return f"Error in API call: {str(e)}" # Function to interface with the Gradio UI def chatbot_interface(documents, question): text = '' for doc in documents: content = read_document(doc) text += validate_content(content) + "\n\n" answer = get_answer(f"{text}\n\nQuestion: {question}") return answer # Gradio Interface with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo: gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title") gr.Markdown("Upload documents and ask questions related to them.", elem_id="description") with gr.Row(): with gr.Column(): doc_input = gr.File(file_count="multiple", label="Upload Documents") question_input = gr.Textbox(label="Ask a Question") with gr.Column(): output = gr.Textbox(label="Answer") submit_button = gr.Button("Get Answer") submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output) demo.launch()