Spaces:

reddit-tools-HF
/

dataset-creator-reddit-bestofredditorupdates

Running

App Files Files Community

dataset-creator-reddit-bestofredditorupdates / app.py

derek-thomas's picture

derek-thomas HF staff

Making the app more clear

24c9f40 11 months ago

No virus

1.86 kB

	import os
	from pathlib import Path

	import gradio as gr
	from rich.console import Console
	from rich.syntax import Syntax

	proj_dir = Path(__name__).parent

	subreddit = os.environ["SUBREDDIT"]
	username = os.environ["USERNAME"]
	dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"


	def log_file_to_html_string():
	log_file = "mylog.log"
	num_lines_visualize = 50

	console = Console(record=True, width=150)
	with open(log_file, "rt") as f:
	# Seek to the end of the file minus 300 lines
	# Read the last 300 lines of the file
	lines = f.readlines()
	lines = lines[-num_lines_visualize:]

	# Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
	output = "".join(lines)
	syntax = Syntax(output, "python", theme="monokai", word_wrap=True)

	console.print(syntax)
	html_content = console.export_html(inline_styles=True)

	return html_content


	markdown = f"""
	# Reddit Scraper
	This is a reddit scraper which builds and updates [{dataset_name}](https://huggingface.co/datasets/{dataset_name}). Check the README for more details.

	As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
	"""

	with gr.Blocks() as demo:
	gr.Markdown(markdown)
	gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
	output = gr.HTML(log_file_to_html_string, every=1)
	gr.Markdown("# Logs")
	demo.load(None,
	_js="""
	() => {
	document.body.classList.toggle('dark');
	document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
	}
	""", )

	if __name__ == '__main__':
	demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)