Spaces:
Running
Running
File size: 8,670 Bytes
5d0b799 200e087 69ce619 3c6c367 5f96800 5d0b799 5f96800 5d0b799 b4f8caf c458cbf 5d0b799 ce281ba d03e2fb ce281ba 69ce619 c458cbf 69ce619 e6ead0c e7c7689 116a15a 6d99c81 bebde33 035a290 bebde33 6bb718e e6ead0c 69ce619 e7c7689 a182e49 6d99c81 bebde33 035a290 bebde33 200e087 69ce619 200e087 027b0a4 200e087 1cbdd39 5d62a8f 200e087 027b0a4 68b8132 1cbdd39 027b0a4 200e087 c568cf2 a182e49 5d62a8f 4f9016f 5d62a8f e7c7689 4f9016f 6d99c81 200e087 5d62a8f 4f9016f ce281ba 4f9016f 531af83 116a15a 3dd2e22 6d99c81 4f9016f ce281ba 4f9016f ce281ba 5d62a8f 4f9016f ce281ba 4f9016f 531af83 116a15a 3dd2e22 6d99c81 4f9016f 116a15a a61c9c0 ce281ba 4f9016f ce281ba 4f9016f ce281ba 4f9016f ce281ba 2aba08e 4f9016f ce281ba 4f9016f a61c9c0 4f9016f e7c7689 4f9016f 3c6c367 4f9016f e7c7689 4f9016f 71a5205 4f9016f 5d0b799 5d62a8f 5d0b799 702a17d 5d0b799 5d62a8f 5d0b799 7292b0f 0c26770 5d0b799 5d62a8f 69ce619 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import os
import gradio as gr
import cohere
import requests
from crewai import Agent, Task, Crew, Process
from langchain_groq import ChatGroq
from langchain_cohere import ChatCohere
from langchain_community.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
from crewai_tools import tool, SeleniumScrapingTool, ScrapeWebsiteTool
from duckduckgo_search import DDGS
from newspaper import Article
# Ensure essential environment variables are set
cohere_api_key = os.getenv('COHERE_API_KEY')
if not cohere_api_key:
raise EnvironmentError("COHERE_API_KEY is not set in environment variables")
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
raise EnvironmentError("GROQ_API_KEY is not set in environment variables")
# Initialize API clients
co = cohere.Client(cohere_api_key)
print("client ok")
def fetch_content(url):
try:
article = Article(url)
article.download()
article.parse()
return article.text
except Exception as e:
print("ERROR: " + str(e))
return f"Error fetching content: {e}"
# Define the DuckDuckGoSearch tool
@tool('DuckDuckGoSearchResults')
def search_results(search_query: str) -> dict:
"""
Performs a web search to gather and return a collection of search results.
This tool automates the retrieval of web-based information related to a specified query.
Args:
- search_query (str): The query string that specifies the information to be searched on the web. This should be a clear and concise expression of the user's information needs.
Returns:
- list: A list of dictionaries, where each dictionary represents a search result. Each dictionary includes 'snippet' of the page and the 'link' with the url linking to it.
"""
results = DDGS().text(search_query, max_results=5, timelimit='m')
results_list = [{"title": result['title'], "snippet": result['body'], "link": result['href']} for result in results]
return results_list
@tool('WebScrapper')
def web_scrapper(url: str, topic: str) -> str:
"""
A tool designed to extract and read the content of a specified link and generate a summary on a specific topic.
It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content.
This tool is particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
Args:
- url (str): The URL from which to scrape content.
- topic (str): The specific topic on which to generate a summary.
Returns:
- summary (str): summary of the url on the topic
"""
# Scrape content from the specified URL
content = fetch_content(url)
# Prepare the prompt for generating the summary
prompt = f"Generate a summary of the following content on the topic ## {topic} ### \n\nCONTENT:\n\n" + content
# Generate the summary using Cohere
response = co.chat(
model='command-r-plus',
message=prompt,
temperature=0.4,
max_tokens=1000,
chat_history=[],
prompt_truncation='AUTO'
)
summary_response = f"""###
Summary:
{response.text}
URL: {url}
###
"""
return summary_response
def kickoff_crew(topic: str, model_choice: str) -> str:
try:
# Initialize the large language models based on user selection
groq_llm = ChatGroq(temperature=0, groq_api_key=groq_api_key, model_name=model_choice)
# Define Agents with Groq LLM
researcher = Agent(
role='Researcher',
goal='Search and Collect detailed information on topic ## {topic} ##',
tools=[search_results, web_scrapper],
llm=groq_llm, # Assigning the LLM here
backstory=(
"You are a meticulous researcher, skilled at navigating vast amounts of information to extract essential insights on any given topic. "
"Your dedication to detail ensures the reliability and thoroughness of your findings. "
"With a strategic approach, you carefully analyze and document data, aiming to provide accurate and trustworthy results."
),
allow_delegation=False,
max_iter=15,
max_rpm=20,
memory=True,
verbose=True
)
editor = Agent(
role='Editor',
goal='Compile and refine the information into a comprehensive report on topic ## {topic} ##',
llm=groq_llm, # Assigning the LLM here
backstory=(
"As an expert editor, you specialize in transforming raw data into clear, engaging reports. "
"Your strong command of language and attention to detail ensure that each report not only conveys essential insights "
"but is also easily understandable and appealing to diverse audiences. "
),
allow_delegation=False,
max_iter=5,
max_rpm=15,
memory=True,
verbose=True
)
# Define Tasks
research_task = Task(
description=(
"Use the DuckDuckGoSearchResults tool to collect initial search snippets on ## {topic} ##. "
"If more detailed searches are required, generate and execute new queries related to ## {topic} ##. "
"Subsequently, employ the WebScrapper tool to delve deeper into significant URLs identified from the snippets, extracting further information and insights. "
"Compile these findings into a preliminary draft, documenting all relevant sources, titles, and links associated with the topic. "
"Ensure high accuracy throughout the process and avoid any fabrication or misrepresentation of information."
),
expected_output=(
"A structured draft report about the topic, featuring an introduction, a detailed main body organized by different aspects of the topic, and a conclusion. "
"Each section should properly cite sources, providing a thorough overview of the information gathered."
),
agent=researcher
)
edit_task = Task(
description=(
"Review and refine the initial draft report from the research task. Organize the content logically to enhance information flow. "
"Verify the accuracy of all data, correct discrepancies, and update information to ensure it reflects current knowledge and is well-supported by sources. "
"Improve the report’s readability by enhancing language clarity, adjusting sentence structures, and maintaining a consistent tone. "
"Include a section listing all sources used, formatted as bullet points following this template: "
"- title: url'."
),
expected_output=(
"A polished, comprehensive report on topic ## {topic} ##, with a clear, professional narrative that accurately reflects the research findings. "
"The report should include an introduction, an extensive discussion section, a concise conclusion, and a well-organized source list. "
"Ensure the document is grammatically correct and ready for publication or presentation."
),
agent=editor,
context=[research_task]
)
# Forming the Crew
crew = Crew(
agents=[researcher, editor],
tasks=[research_task, edit_task],
process=Process.sequential,
)
# Kick-off the research process
result = crew.kickoff(inputs={'topic': topic})
if not isinstance(result, str):
result = str(result)
return result
except Exception as e:
return f"Error: {str(e)}"
def main():
"""Set up the Gradio interface for the CrewAI Research Tool."""
with gr.Blocks() as demo:
gr.Markdown("## CrewAI Research Tool")
topic_input = gr.Textbox(label="Enter Topic", placeholder="Type here...")
model_choice = gr.Radio(choices=["llama3-8b-8192", "llama3-70b-8192", 'llama-3.1-8b-instant', 'llama-3.1-70b-versatile'], label="Choose Model")
submit_button = gr.Button("Start Research")
output = gr.Markdown(label="Result")
submit_button.click(
fn=kickoff_crew,
inputs=[topic_input, model_choice],
outputs=output
)
# demo.launch(debug=True)
demo.queue(api_open=False, max_size=3).launch()
if __name__ == "__main__":
main() |