File size: 2,391 Bytes
8aa27f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import json
import gradio as gr
import pandas as pd
from pyspark.sql import SparkSession

# ── Initialize Spark ─────────────────────────────────────────────────────────
spark = (
    SparkSession
    .builder
    .master("local[*]")
    .appName("HF Spark Demo")
    .getOrCreate()
)

# ── Demo 1: Word Count ────────────────────────────────────────────────────────
def count_words(text: str) -> str:
    df = spark.createDataFrame([(text,)], ["sentence"])
    result = df.selectExpr("size(split(sentence, ' ')) as word_count").collect()[0]
    return f"Your input has {result['word_count']} words."

# ── Demo 2: JSON Data Explorer ───────────────────────────────────────────────
def load_example_json() -> pd.DataFrame:
    # Read the example JSON file into a Spark DataFrame
    df = spark.read.json("example_data.json")
    # Convert to pandas for Gradio display
    return df.toPandas()

# ── Build Gradio Interface ────────────────────────────────────────────────────
with gr.Blocks() as demo:
    gr.Markdown("## πŸ”₯ Spark + Gradio Demo on 0.0.0.0")

    with gr.Tab("Word Count"):
        txt = gr.Textbox(lines=3, placeholder="Type something here...", label="Input Text")
        out = gr.Textbox(label="Word Count Result")
        txt.submit(count_words, txt, out)
        gr.Button("Count Words").click(count_words, txt, out)

    with gr.Tab("JSON Data Explorer"):
        df_table = gr.Dataframe(
            value=load_example_json(),
            label="Example Data",
            interactive=False
        )
        gr.Button("Reload Data").click(load_example_json, None, df_table)

# ── Launch ───────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        enable_queue=True
    )