Spaces:

ChefPony
/

Agent_Course_Final_Assignment

Sleeping

Agent_Course_Final_Assignment / tools.py

alberto

first commit

7993ea5 10 days ago

4.43 kB

	from typing import Dict
	from transformers import pipeline
	from smolagents.tools import Tool
	import torchcodec


	class VisitWikiPageTool(Tool):
	name = "visit_wikipage"
	description = (
	"Visits a Wikipedia page at the given url and reads its content as a markdown string. Use this to browse Wikipedia wepages and get their full content."
	)
	inputs = {
	"url": {
	"type": "string",
	"description": "The url of the webpage to visit.",
	},
	"max_length": {
	"type": "integer",
	"description": "Maximum number of characters to include in the response. Default 40000.",
	"nullable": True
	}
	}
	output_type = "string"

	def __init__(self, user_agent: str):
	super().__init__()
	self.headers = {"User-Agent": user_agent}

	def _truncate_content(self, content: str, max_length: int) -> str:
	if len(content) <= max_length:
	return content
	return (
	content[:max_length] + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
	)

	def forward(self, url: str, max_length: int = 40000) -> str:
	try:
	import re
	import requests
	from markdownify import markdownify
	from requests.exceptions import RequestException
	except ImportError as e:
	raise ImportError(
	"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
	) from e
	try:
	# Send a GET request to the URL with a 20-second timeout
	response = requests.get(url, timeout=20, headers=self.headers)
	response.raise_for_status() # Raise an exception for bad status codes

	# Convert the HTML content to Markdown
	markdown_content = markdownify(response.text).strip()
	max_length = max_length if max_length is not None else 40000
	# Remove multiple line breaks
	markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
	return self._truncate_content(markdown_content, max_length)

	except requests.exceptions.Timeout:
	return "The request timed out. Please try again later or check the URL."
	except RequestException as e:
	return f"Error fetching the webpage: {str(e)}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"

	class SpeechToTextTool(Tool):
	name = "transcriber"
	description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
	inputs = {
	"audio": {
	"type": "audio",
	"description": "The audio to transcribe it should be bytes.",
	},
	"sample_rate": {
	"type": "integer",
	"description": "The sampling rate to use to decode the audio, defaults to 16000",
	"nullable": True
	}
	}
	output_type = "string"
	def __init__(self, model: str = "openai/whisper-small"):
	super().__init__()
	self.pipe = pipeline("automatic-speech-recognition", model=model)

	def forward(self, audio: bytes, sample_rate: int=16000) -> str:
	sample_rate = sample_rate if sample_rate is not None else 16000
	decoder = torchcodec.decoders.AudioDecoder(audio, sample_rate=sample_rate)
	out = self.pipe(decoder)
	return out["text"]

	class SpeechToTextTool(Tool):
	name = "transcriber"
	description = "This is a tool that transcribes an audio into text. It returns the transcribed text."
	inputs = {
	"audio_file": {
	"type": "string",
	"description": "The path to the audio file to transcribe.",
	},
	"sample_rate": {
	"type": "integer",
	"description": "The sampling rate to use to decode the audio, defaults to 16000",
	"nullable": True
	}
	}
	output_type = "string"
	def __init__(self, model: str = "openai/whisper-small"):
	super().__init__()
	self.pipe = pipeline("automatic-speech-recognition", model=model)

	def forward(self, audio_file: str, sample_rate: int=16000) -> str:
	sample_rate = sample_rate if sample_rate is not None else 16000
	with open(audio_file, "rb") as f:
	decoder = torchcodec.decoders.AudioDecoder(f, sample_rate=sample_rate)
	out = self.pipe(decoder)
	return out["text"]