derek-thomas's picture
derek-thomas HF staff
Making the app more clear
24c9f40
raw
history blame
No virus
1.86 kB
import os
from pathlib import Path
import gradio as gr
from rich.console import Console
from rich.syntax import Syntax
proj_dir = Path(__name__).parent
subreddit = os.environ["SUBREDDIT"]
username = os.environ["USERNAME"]
dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"
def log_file_to_html_string():
log_file = "mylog.log"
num_lines_visualize = 50
console = Console(record=True, width=150)
with open(log_file, "rt") as f:
# Seek to the end of the file minus 300 lines
# Read the last 300 lines of the file
lines = f.readlines()
lines = lines[-num_lines_visualize:]
# Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
output = "".join(lines)
syntax = Syntax(output, "python", theme="monokai", word_wrap=True)
console.print(syntax)
html_content = console.export_html(inline_styles=True)
return html_content
markdown = f"""
# Reddit Scraper
This is a reddit scraper which builds and updates [{dataset_name}](https://huggingface.co/datasets/{dataset_name}). Check the README for more details.
As shown in the below diagram this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
"""
with gr.Blocks() as demo:
gr.Markdown(markdown)
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
output = gr.HTML(log_file_to_html_string, every=1)
gr.Markdown("# Logs")
demo.load(None,
_js="""
() => {
document.body.classList.toggle('dark');
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
}
""", )
if __name__ == '__main__':
demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)