File size: 1,776 Bytes
8e8a9fc 749d1d8 8e8a9fc 99dbf0f 285612d 8e8a9fc 749d1d8 e014498 749d1d8 52bca1a d8d1956 52bca1a 749d1d8 fc00c85 8e8a9fc 285612d 8e8a9fc 749d1d8 46a475d 215f172 99dbf0f 749d1d8 8f11653 fc00c85 8f11653 fc00c85 749d1d8 0707bd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
from pathlib import Path
import gradio as gr
from rich.console import Console
from rich.syntax import Syntax
proj_dir = Path(__name__).parent
subreddit = os.environ["SUBREDDIT"]
username = os.environ["USERNAME"]
dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"
def log_file_to_html_string():
log_file = "mylog.log"
num_lines_visualize = 50
console = Console(record=True, width=150)
with open(log_file, "rt") as f:
# Seek to the end of the file minus 300 lines
# Read the last 300 lines of the file
lines = f.readlines()
lines = lines[-num_lines_visualize:]
# Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
output = "".join(lines)
syntax = Syntax(output, "python", theme="monokai", word_wrap=True)
console.print(syntax)
html_content = console.export_html(inline_styles=True)
return html_content
markdown = f"""
# Reddit Scraper
This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}).
As shown below this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
"""
with gr.Blocks() as demo:
gr.Markdown(markdown)
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
output = gr.HTML(log_file_to_html_string, every=1)
demo.load(None,
_js="""
() => {
document.body.classList.toggle('dark');
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
}
""", )
if __name__ == '__main__':
demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|