Spaces:

Engr-Saeed
/

RAGChatBot

Running

App Files Files Community

RAGChatBot / app.py

Engr-Saeed

Update app.py

b240c1f verified about 2 months ago

raw

history blame

No virus

3.38 kB

	import pandas as pd
	from PyPDF2 import PdfReader
	from docx import Document
	import os
	from groq import Groq
	import gradio as gr

	# Function to read and process different document types
	def read_document(file):
	try:
	file_extension = os.path.splitext(file.name)[-1].lower()
	print(f"Processing file: {file.name} with extension {file_extension}")

	if file_extension == '.txt':
	return file.read().decode('utf-8')
	elif file_extension == '.pdf':
	reader = PdfReader(file)
	text = ''
	for page in reader.pages:
	text += page.extract_text()
	return text
	elif file_extension == '.docx':
	doc = Document(file)
	return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
	elif file_extension in ['.csv', '.xls', '.xlsx']:
	df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file)
	return df.to_string(index=False)
	else:
	return "Unsupported file format"
	except Exception as e:
	print(f"Error processing file: {file.name} - {str(e)}")
	return f"Error processing file: {file.name} - {str(e)}"

	# Pass your API key directly when initializing the Groq client

	api_key = "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" # Replace with your actual API key
	client = Groq(api_key=api_key)

	client = Groq()

	# Function to validate and truncate content to prevent API errors
	def validate_content(text):
	# Basic validation to remove unwanted characters
	validated_text = ''.join(e for e in text if e.isalnum() or e.isspace())
	# Truncate text if it's too long
	max_length = 8000 # Adjust as needed
	if len(validated_text) > max_length:
	validated_text = validated_text[:max_length] + "..."
	return validated_text

	# Function to get an answer from the Groq API
	def get_answer(question, model="llama3-8b-8192"):
	try:
	chat_completion = client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": question}],
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	print(f"Error in Groq API call: {str(e)}")
	if hasattr(e, 'response'):
	print(f"Full response: {e.response.json()}")
	return f"Error in API call: {str(e)}"

	# Function to interface with the Gradio UI
	def chatbot_interface(documents, question):
	text = ''
	for doc in documents:
	content = read_document(doc)
	text += validate_content(content) + "\n\n"

	answer = get_answer(f"{text}\n\nQuestion: {question}")
	return answer

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo:
	gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title")
	gr.Markdown("Upload documents and ask questions related to them.", elem_id="description")

	with gr.Row():
	with gr.Column():
	doc_input = gr.File(file_count="multiple", label="Upload Documents")
	question_input = gr.Textbox(label="Ask a Question")

	with gr.Column():
	output = gr.Textbox(label="Answer")

	submit_button = gr.Button("Get Answer")
	submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output)

	demo.launch()