Spaces:

reddit-tools-HF
/

dataset-creator-reddit-bestofredditorupdates

Running

dataset-creator-reddit-bestofredditorupdates

File size: 1,776 Bytes

8e8a9fc
 
 
749d1d8
 
 
 
8e8a9fc
 
99dbf0f
 
285612d
8e8a9fc
749d1d8
 
 
e014498
749d1d8
 
 
52bca1a
 
 
d8d1956
52bca1a
 
 
 
749d1d8
 
 
 
 
 
fc00c85
8e8a9fc
 
 
 
285612d
8e8a9fc
749d1d8
46a475d
215f172
99dbf0f
749d1d8
8f11653
fc00c85
8f11653
 
 
 
fc00c85
749d1d8
 
0707bd4

import os
from pathlib import Path

import gradio as gr
from rich.console import Console
from rich.syntax import Syntax

proj_dir = Path(__name__).parent

subreddit = os.environ["SUBREDDIT"]
username = os.environ["USERNAME"]
dataset_name = f"{username}/dataset-creator-reddit-{subreddit}"


def log_file_to_html_string():
    log_file = "mylog.log"
    num_lines_visualize = 50

    console = Console(record=True, width=150)
    with open(log_file, "rt") as f:
        # Seek to the end of the file minus 300 lines
        # Read the last 300 lines of the file
        lines = f.readlines()
        lines = lines[-num_lines_visualize:]

        # Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
        output = "".join(lines)
        syntax = Syntax(output, "python", theme="monokai", word_wrap=True)

    console.print(syntax)
    html_content = console.export_html(inline_styles=True)

    return html_content


markdown = f"""
# Reddit Scraper
This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}).

As shown below this space pulls data from reddit via [PRAW](https://praw.readthedocs.io/en/stable/), processes it, and puts it in a corresponding dataset.
"""

with gr.Blocks() as demo:
    gr.Markdown(markdown)
    gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
    output = gr.HTML(log_file_to_html_string, every=1)
    demo.load(None,
              _js="""
        () => {
            document.body.classList.toggle('dark');
            document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
        }
        """, )

if __name__ == '__main__':
    demo.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)