Spaces:

jsulz
/

sotu-analysis

Running

App Files Files Community

jsulz HF staff commited on Aug 28

Commit

11067be

•

1 Parent(s): 317f161

continuing to refine app

Browse files

Files changed (2) hide show

app.py +49 -7
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,20 +1,62 @@
 import gradio as gr
 from datasets import load_dataset
 # Load the dataset and convert it to a Pandas dataframe
 sotu_dataset = 'jsulz/state-of-the-union-addresses'
 dataset = load_dataset(sotu_dataset)
 df = dataset['train'].to_pandas()
-print(df.head(10))
-def greet(name):
-    return "Hello " + name + ", you're cool!!"
 # Create a Gradio interface with blocks
 with  gr.Blocks() as demo:
     with gr.Row():
-        gr.Markdown("# A Dashboard to Analyze the State of the Union Addresses")
-        demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
 from datasets import load_dataset
+from nltk.util import ngrams
+from collections import Counter
 # Load the dataset and convert it to a Pandas dataframe
 sotu_dataset = 'jsulz/state-of-the-union-addresses'
 dataset = load_dataset(sotu_dataset)
 df = dataset['train'].to_pandas()
+df['word_count'] = df['speech_html'].apply(lambda x: len(x.split()))
+written = df[df['categories'] == 'Written']
+spoken = df[df['categories'] == 'Spoken']
 # Create a Gradio interface with blocks
 with  gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # A Dashboard to Analyze the State of the Union Addresses
+        """)
+    # get all unique president names
+    presidents = df['potus'].unique()
+    # convert presidents to a list
+    presidents = presidents.tolist()
+    # create a dropdown to select a president
+    president = gr.Dropdown(label="Select a President", choices=["All"] + presidents)
     with gr.Row():
+        # if president is not of type string
+        @gr.render(inputs=president)
+        def show_text(potus):
+            if potus is not None:
+                gr.Markdown(f"{potus} was the first president of the United States.")
+        @gr.render(inputs=president)
+        def word_length_bar(potus):
+            # calculate the total number of words in the speech_html column and add it to a new column
+            # if the president is "All", show the word count for all presidents
+            if potus == "All":
+                gr.BarPlot(df, x="date", y="word_count", title="Total Number of Words in the Speeches")
+            else:
+                # if the president is not "All", show the word count for the selected president
+                gr.BarPlot(df[df['potus'] == potus], x="date", y="word_count", title="Total Number of Words in the Speeches")
+    with gr.Row():
+        @gr.render(inputs=president)
+        def ngram_bar(potus):
+            # create a Counter object from the trigrams
+            potus_df = df[df["potus"] == potus]
+            trigrams = (
+                potus_df["tokens-nostop"].apply(lambda x: list(ngrams(x, 3))).apply(Counter).sum()
+            )
+            # get the most common trigrams
+            common_trigrams = trigrams.most_common(20)
+            # unzip the list of tuples and plot the trigrams and counts as a bar chart
+            trigrams, counts = zip(*common_trigrams)
+            # join the trigrams into a single string
+            trigrams = [" ".join(trigram) for trigram in trigrams]
+            # create a dataframe from the trigrams and counts
+            trigrams_df = pd.DataFrame({"trigrams": trigrams, "counts": counts})
+            # plot the trigrams and counts as a bar chart
+            gr.BarPlot(trigrams_df, x="trigrams", y="counts", title="Most Common Trigrams")
 demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ nltk=="^3.9.1"