Spaces:

SurabhiT
/

DocuSmart

Running

App Files Files Community

DocuSmart / app.py

SurabhiT

Update app.py

a1ef925 verified about 1 year ago

raw

history blame contribute delete

9.79 kB

	import os
	import tkinter as tk
	from tkinter import filedialog
	from crewai import Agent, Task, Crew
	from langchain_openai import ChatOpenAI
	from langchain_community.llms import Ollama
	from langchain.agents.agent_types import AgentType
	from langchain_experimental.agents.agent_toolkits import create_csv_agent
	from langchain_openai import ChatOpenAI, OpenAI
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_groq import ChatGroq
	from crewai.process import Process
	import gradio as gr
	import numpy as np

	api_key=os.getenv("GOOGLE_API_KEY")



	os.environ["GOOGLE_API_KEY"] = api_key


	from crewai_tools import PDFSearchTool
	from crewai_tools import FileReadTool
	from crewai_tools import DOCXSearchTool
	from crewai_tools import TXTSearchTool
	from crewai_tools import CSVSearchTool



	llm = ChatGoogleGenerativeAI(
	model="gemini-1.5-flash-latest",
	verbose=True,
	temperature=0.6,
	google_api_key=api_key
	)


	#--------------------------------------------Class for choosing agent---------------------------------------#
	class agentCollection:

	def agentPDF(filepath):
	agentpdf = Agent(
	role="PDF Content Searcher and Writer",
	goal="Generate a detailed description of relevant content from a PDF provided by the user",
	backstory="You are an expert in navigating and extracting detailed information from PDF documents. Your task is to find the most relevant and accurate content within the PDF and provide a detailed description that addresses the user's query.",
	verbose=True,
	tools=[toolsCollection.toolPDF(filepath)],
	llm=llm,
	allow_delegation=False,
	max_Iter=6

	)
	return agentpdf

	def agentFile(filepath):
	agentfile = Agent(
	role="General File Content Searcher and Writer",
	goal="Generate a detailed description of relevant content from various file formats provided by the user",
	backstory="You have extensive experience in handling different types of files, including PDFs, DOCX, TXT, and CSV. Your role is to expertly extract and describe the most pertinent information from any file format to meet the user's needs.",
	verbose=True,
	tools=[toolsCollection.toolFile(filepath)],
	llm=llm,
	allow_delegation=False

	)
	return agentfile

	def agentTXT(filepath):
	agenttxt = Agent(
	role="Text File Content Searcher and Writer",
	goal="Generate a detailed description of relevant content from text files provided by the user",
	backstory="You specialize in working with plain text files. Your job is to sift through the text and identify the most relevant information, providing a detailed description that fulfills the user's query.",
	verbose=True,
	tools=[toolsCollection.toolTXT(filepath)],
	llm=llm,
	allow_delegation=False

	)
	return agenttxt

	def agentDOCX(filepath):
	agentdoc = Agent(
	role="DOCX Content Searcher and Writer",
	goal="Generate a detailed description of relevant content from DOCX files provided by the user",
	backstory="You are proficient in reading and extracting detailed information from DOCX documents. Your expertise allows you to locate and describe the most relevant content within a DOCX file, ensuring the user's query is answered thoroughly and accurately.",
	verbose=True,
	tools=[toolsCollection.toolDOCX(filepath)],
	llm=llm,
	allow_delegation=False

	)
	return agentdoc

	def agentCSV(filepath):
	agentcsv = create_csv_agent(
	llm,
	filepath,
	verbose=True,
	agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION
	)
	return agentcsv

	def agentContentWriter():
	agentwriter = Agent(
	role="Content Writer",
	goal="Summarize the data received from other agents into a comprehensive report or blog",
	backstory="""You are a skilled content writer with expertise in synthesizing information from various sources. Your task is to use the detailed descriptions provided by other agents to create a well-structured and coherent summary that addresses the user's query in detail.""",
	verbose=True,
	llm=llm,
	max_Iter=10

	)
	return agentwriter

	#--------------------------------------------Class for choosing tool---------------------------------------#

	class toolsCollection:

	def toolPDF(filepath):
	if filepath == "":
	print("FILE NOT FOUND")
	return
	pdftool = PDFSearchTool(
	config=dict(
	llm=dict(
	provider="google",
	config=dict(
	model="gemini-1.5-flash-latest",
	),
	),
	embedder=dict(
	provider="huggingface",
	config=dict(
	model="sentence-transformers/msmarco-distilbert-base-v4"
	),
	),
	),
	pdf=filepath
	)
	return pdftool

	def toolFile(filepath):
	filetool = FileReadTool(
	config=dict(
	llm=dict(
	provider="google",
	config=dict(
	model="gemini-1.5-flash-latest",
	),
	),
	embedder=dict(
	provider="huggingface",
	config=dict(
	model="sentence-transformers/msmarco-distilbert-base-v4"
	),
	),
	),
	file_path=filepath
	)
	return filetool

	def toolTXT(filepath):
	txttool = TXTSearchTool(
	config=dict(
	llm=dict(
	provider="google",
	config=dict(
	model="gemini-1.5-flash-latest",
	),
	),
	embedder=dict(
	provider="huggingface",
	config=dict(
	model="sentence-transformers/msmarco-distilbert-base-v4"
	),
	),
	),
	txt=filepath
	)
	return txttool

	def toolDOCX(filepath):
	if filepath == "":
	print("FILE NOT FOUND")
	return
	docxtool = DOCXSearchTool(
	config=dict(
	llm=dict(
	provider="google",
	config=dict(
	model="gemini-1.5-flash-latest",
	),
	),
	embedder=dict(
	provider="huggingface",
	config=dict(
	model="sentence-transformers/msmarco-distilbert-base-v4"
	),
	),
	),
	docx=filepath
	)
	return docxtool

	def toolCSV(filepath):
	csvtool = CSVSearchTool(
	config=dict(
	llm=dict(
	provider="google",
	config=dict(
	model="gemini-1.5-flash-latest",
	),
	),
	embedder=dict(
	provider="huggingface",
	config=dict(
	model="sentence-transformers/msmarco-distilbert-base-v4"
	),
	),
	),
	csv=filepath
	)
	return csvtool

	def run_ai(file, query, required_ans_format):
	filepath = file.name

	if filepath.endswith(".pdf"):
	myagent = agentCollection.agentPDF(filepath)
	elif filepath.endswith(".json"):
	myagent = agentCollection.agentFile(filepath)
	elif filepath.endswith(".docx"):
	myagent = agentCollection.agentDOCX(filepath)
	elif filepath.endswith(".txt"):
	myagent = agentCollection.agentTXT(filepath)
	elif filepath.endswith(".csv"):
	myagent = agentCollection.agentCSV(filepath)
	return myagent.run(query)

	task = Task(
	description=f"{query}",
	expected_output=f"detailed description on {query}",
	agent=myagent,
	)

	content_writer_agent = agentCollection.agentContentWriter()
	content_writer_task = Task(
	description=f"{query}",
	expected_output=f'{required_ans_format}',
	agent=content_writer_agent,
	)

	crew = Crew(
	agents=[myagent, content_writer_agent],
	tasks=[task, content_writer_task],
	process=Process.sequential,
	verbose=2
	)

	result = crew.kickoff()
	return result

	interface = gr.Interface(
	fn=run_ai,
	inputs=[
	gr.File(label="Upload File"),
	gr.Textbox(label="Query"),
	gr.Textbox(label="Expected Output")
	],
	outputs="text",
	title="DocuSmart",
	description=(
	"Upload a file (CSV, PDF, DOCX, TXT, JSON) and enter your query to get detailed information.\n\n"
	"### Instructions:\n"
	"1. Upload the file you want to talk to.\n"
	"2. Enter your question in the Query field.\n"
	"3. Specify the desired output format, e.g., one line answer.\n"
	"4. Press 'Submit' and wait for the response.\n\n"
	),
	examples=[
	["LabManual.pdf", "What is RIP?", "detailed description"],
	["ElectricCarData_Clean.csv", "Which Brand has most vehicles?", "one line answer"]
	],
	theme=gr.themes.Soft()
	)

	interface.launch()