Spaces:
Running
Running
continuing to refine app
Browse files- app.py +49 -7
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,20 +1,62 @@
|
|
1 |
import gradio as gr
|
2 |
from datasets import load_dataset
|
|
|
|
|
3 |
|
4 |
# Load the dataset and convert it to a Pandas dataframe
|
5 |
sotu_dataset = 'jsulz/state-of-the-union-addresses'
|
6 |
dataset = load_dataset(sotu_dataset)
|
7 |
df = dataset['train'].to_pandas()
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
def greet(name):
|
12 |
-
return "Hello " + name + ", you're cool!!"
|
13 |
|
14 |
# Create a Gradio interface with blocks
|
15 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
with gr.Row():
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from datasets import load_dataset
|
3 |
+
from nltk.util import ngrams
|
4 |
+
from collections import Counter
|
5 |
|
6 |
# Load the dataset and convert it to a Pandas dataframe
|
7 |
sotu_dataset = 'jsulz/state-of-the-union-addresses'
|
8 |
dataset = load_dataset(sotu_dataset)
|
9 |
df = dataset['train'].to_pandas()
|
10 |
+
df['word_count'] = df['speech_html'].apply(lambda x: len(x.split()))
|
11 |
+
written = df[df['categories'] == 'Written']
|
12 |
+
spoken = df[df['categories'] == 'Spoken']
|
|
|
|
|
13 |
|
14 |
# Create a Gradio interface with blocks
|
15 |
with gr.Blocks() as demo:
|
16 |
+
gr.Markdown(
|
17 |
+
"""
|
18 |
+
# A Dashboard to Analyze the State of the Union Addresses
|
19 |
+
""")
|
20 |
+
# get all unique president names
|
21 |
+
presidents = df['potus'].unique()
|
22 |
+
# convert presidents to a list
|
23 |
+
presidents = presidents.tolist()
|
24 |
+
# create a dropdown to select a president
|
25 |
+
president = gr.Dropdown(label="Select a President", choices=["All"] + presidents)
|
26 |
with gr.Row():
|
27 |
+
# if president is not of type string
|
28 |
+
@gr.render(inputs=president)
|
29 |
+
def show_text(potus):
|
30 |
+
if potus is not None:
|
31 |
+
gr.Markdown(f"{potus} was the first president of the United States.")
|
32 |
+
|
33 |
+
@gr.render(inputs=president)
|
34 |
+
def word_length_bar(potus):
|
35 |
+
# calculate the total number of words in the speech_html column and add it to a new column
|
36 |
+
# if the president is "All", show the word count for all presidents
|
37 |
+
if potus == "All":
|
38 |
+
gr.BarPlot(df, x="date", y="word_count", title="Total Number of Words in the Speeches")
|
39 |
+
else:
|
40 |
+
# if the president is not "All", show the word count for the selected president
|
41 |
+
gr.BarPlot(df[df['potus'] == potus], x="date", y="word_count", title="Total Number of Words in the Speeches")
|
42 |
+
with gr.Row():
|
43 |
+
|
44 |
+
@gr.render(inputs=president)
|
45 |
+
def ngram_bar(potus):
|
46 |
+
# create a Counter object from the trigrams
|
47 |
+
potus_df = df[df["potus"] == potus]
|
48 |
+
trigrams = (
|
49 |
+
potus_df["tokens-nostop"].apply(lambda x: list(ngrams(x, 3))).apply(Counter).sum()
|
50 |
+
)
|
51 |
+
# get the most common trigrams
|
52 |
+
common_trigrams = trigrams.most_common(20)
|
53 |
+
# unzip the list of tuples and plot the trigrams and counts as a bar chart
|
54 |
+
trigrams, counts = zip(*common_trigrams)
|
55 |
+
# join the trigrams into a single string
|
56 |
+
trigrams = [" ".join(trigram) for trigram in trigrams]
|
57 |
+
# create a dataframe from the trigrams and counts
|
58 |
+
trigrams_df = pd.DataFrame({"trigrams": trigrams, "counts": counts})
|
59 |
+
# plot the trigrams and counts as a bar chart
|
60 |
+
gr.BarPlot(trigrams_df, x="trigrams", y="counts", title="Most Common Trigrams")
|
61 |
|
62 |
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
nltk=="^3.9.1"
|