Spaces:

p-vida
/

telzho

Paused

LONGYKING

update

ed3fa91 over 1 year ago

4.46 kB

	import os
	from dotenv import load_dotenv

	from scrapegraphai.graphs import SearchGraph
	from scrapegraphai.graphs import SmartScraperMultiGraph
	from scrapegraphai.graphs import ScriptCreatorGraph

	from src.databases.redis import REDIS_CACHED
	from src.libs.constants import ONE_HOUR_IN_SECONDS
	from src.libs.logger import logger


	load_dotenv()

	redis_cache = REDIS_CACHED

	default_graph_config: dict = {
	"openai": {
	"llm": {
	"api_key": os.getenv("OPENAI_API_KEY"),
	"model": "gpt-3.5-turbo",
	# "model": "gpt-4o",?
	"temperature": 0,
	}
	},
	"groq": {
	"llm": {
	"model": "groq/llama3-70b-8192",
	"api_key": os.getenv("GROQ_API_KEY"),
	"temperature": 0
	},
	"embeddings": {
	"model": "ollama/nomic-embed-text",
	"base_url": "http://localhost:11434",
	}
	}
	}

	@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
	@logger.instrument()
	def scrape_graph(prompt: str, sources: str \| list[str], config: dict \| str = None, cache_ttl: int = None) -> str:
	"""
	This function scrapes the web using a multi-graph approach. It takes a prompt, sources, optional configuration, and optional cache time-to-live.

	Args:
	prompt (str): The prompt or query to be used for scraping.
	sources (str \| list[str]): The source(s) from which to scrape data. It can be a single string or a list of strings.
	config (dict \| str, optional): The configuration for the scraping process. Defaults to the default "openai" configuration.
	cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

	Returns:
	str: The scraped data as a string.
	"""
	logger.info({'prompt': prompt, 'sources': sources, 'config': config, 'cache_ttl': cache_ttl})

	config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
	logger.debug(f"Config: {config}")

	smart_scraper_graph = SmartScraperMultiGraph(
	prompt=prompt,
	source=sources,
	config=config
	)

	result = smart_scraper_graph.run()
	logger.debug(f"Result: {result}")

	return result

	@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
	@logger.instrument()
	def search_graph(prompt: str, config: dict \| str = None, cache_ttl: int = None) -> str:
	"""
	This function uses a SearchGraph to search the web for relevant information based on the provided prompt.

	Args:
	prompt (str): The prompt or query to be used for searching.
	config (dict \| str, optional): The configuration for the search process. Defaults to the default "openai" configuration.
	cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

	Returns:
	str: The search results as a string.
	"""
	logger.info({'prompt': prompt, 'config': config, 'cache_ttl': cache_ttl})

	config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
	logger.debug(f"Config: {config}")

	search_graph = SearchGraph(
	prompt=prompt,
	config=config,
	)

	result = search_graph.run()
	logger.debug(f"Result: {result}")

	return result

	@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
	@logger.instrument()
	def _create_script_graph(prompt: str, source: str, library: str, config: dict \| str = None, cache_ttl: int = None) -> str:
	"""
	This function creates a ScriptCreatorGraph for generating scripts based on the provided prompt, source, and library.

	Args:
	prompt (str): The prompt or query to be used for generating the script.
	source (str): The source from which to generate the script.
	library (str): The library to be used for generating the script.
	config (dict \| str, optional): The configuration for the script creation process. Defaults to the default "openai" configuration.
	cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

	Returns:
	str: The generated script as a string.
	"""
	config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]

	script_creator_graph = ScriptCreatorGraph(
	prompt=prompt,
	source=source,
	config=config,
	library=library
	)

	result = script_creator_graph.run()

	return result