derek-thomas's picture
derek-thomas HF staff
Fixing deprecation warning
0707bd4
raw
history blame
No virus
1.78 kB
import os
from pathlib import Path
import gradio as gr
from rich.console import Console
from rich.syntax import Syntax
proj_dir = Path(__name__).parent
subreddit = os.environ["SUBREDDIT"]
username = os.environ["USERNAME"]
dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"
def log_file_to_html_string():
log_file = "mylog.log"
num_lines_visualize = 50
console = Console(record=True, width=150)
with open(log_file, "rt") as f:
# Seek to the end of the file minus 300 lines
# Read the last 300 lines of the file
lines = f.readlines()
lines = lines[-num_lines_visualize:]
# Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
output = "".join(lines)
syntax = Syntax(output, "python", theme="monokai", word_wrap=True)
console.print(syntax)
html_content = console.export_html(inline_styles=True)
return html_content
markdown = f"""
# Reddit Scraper
This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}).
As shown below this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
"""
with gr.Blocks() as demo:
gr.Markdown(markdown)
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
output = gr.HTML(log_file_to_html_string, every=1)
demo.load(None,
_js="""
() => {
document.body.classList.toggle('dark');
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
}
""", )
if __name__ == '__main__':
demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)