Text-Extractor / app.py
rajaramesh's picture
Update app.py
46da807 verified
import gradio as gr
import requests
from langchain_community.document_loaders import UnstructuredURLLoader
from youtube_transcript_api import YouTubeTranscriptApi
import subprocess
def text_extract(generic_url: str) -> str:
"""
Extract the text from any website or youtube video.
Args:
url (str): the url of website or youtube to extract text from it
Returns:
str: A string containing text extracted from website or youtube
"""
final_text=""
if not generic_url.strip():
print("Please provide the information to get started")
return "Please provide the information to get started"
else:
try:
# Another level checking if the url is valid or not
response = requests.get(generic_url, timeout=5)
# response = requests.get(generic_url, verify=False)
if response.status_code == 200:
print("URL is valid and reachable.")
else:
print("Unable to reach")
# loading the website or yt video data
if "youtube.com" in generic_url:
video_id = generic_url.split("v=")[-1]
transcript = YouTubeTranscriptApi.get_transcript(video_id=video_id)
final_text = " ".join([entry['text'] for entry in transcript])
else:
loader=UnstructuredURLLoader(urls=[generic_url],ssl_verify=False,
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"})
docs=loader.load()
text=docs[0].page_content
cleaned_lines = [line.strip() for line in text.split("\n") if line.strip()]
final_text = "\n".join(cleaned_lines)
except requests.exceptions.ConnectionError as e:
print("Error reaching the URL:", e)
return "Pls enter valid url we have encounterd ConnectionError\n"+str(e)
except requests.exceptions.RequestException as e:
print("Error reaching the URL:", e)
return "Pls enter valid url we have encounterd RequestException\n"+str(e)
except Exception as e:
print(f"Exception:{e}")
return "We have encounterd the following error\n"+str(e)
return final_text
def terminal(command: str) -> str:
"""Execute a terminal command and return the output
Args:
command: The command to execute
Returns:
The command output (stdout and stderr combined)
"""
try:
# Run the command and capture output
result = subprocess.run(
command,
shell=True,
check=False,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Combine stdout and stderr
output = result.stdout
if result.stderr:
output += "\n" + result.stderr
return output
except Exception as e:
return f"Error executing command: {str(e)}"
import gradio as gr
def generate_file(content: str) -> str:
"""Generates a downloadable text file with the specified content.
Args:
content (str): The text to be written into the file.
Returns:
str: The file path for downloading.
"""
file_path = "sample.txt" # Ensuring a .txt extension
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
return file_path # Returning the file path for download
# Create multiple interfaces
text_extract_fn = gr.Interface(
fn=text_extract,
inputs=gr.Textbox(placeholder="Paste any website or youtube video url"),
outputs=gr.Textbox(placeholder="Text extracted from website or youtube video"),
flagging_mode="never", # Disables the flag button
title="Text Extractor",
description="Extract the text from any website or youtube video."
)
terminal_fn = gr.Interface(
fn=terminal,
inputs=gr.Textbox(placeholder="Enter you command"),
outputs=gr.Textbox(placeholder="Command output"),
flagging_mode="never", # Disables the flag button
title="Command Prompt",
description="Runs the terminal commands on your computer."
)
generate_file_fn = gr.Interface(
fn=generate_file,
inputs=gr.Textbox(label="File Content", placeholder="Enter text to save in file"),
outputs=gr.File(label="Download your file"),
title="Text File Generator",
description="Create and download a file with your custom content."
)
# Combine using tabs
with gr.Blocks() as demo:(
gr.Markdown("# Please checkout below video to know about this Gradio MCP Server"),
gr.HTML(
"""<iframe width="560" height="315"
src="https://www.youtube.com/embed/4_O3-jjQExQ"
frameborder="0" allowfullscreen></iframe>""",
label="Featured Video"
),
gr.TabbedInterface(
[text_extract_fn, terminal_fn, generate_file_fn],
["Text Extractor", "Command Prompt", "File Generator"]
)
)
if __name__ == "__main__":
demo.launch(mcp_server=True)