jsulz HF staff commited on
Commit
11067be
1 Parent(s): 317f161

continuing to refine app

Browse files
Files changed (2) hide show
  1. app.py +49 -7
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,20 +1,62 @@
1
  import gradio as gr
2
  from datasets import load_dataset
 
 
3
 
4
  # Load the dataset and convert it to a Pandas dataframe
5
  sotu_dataset = 'jsulz/state-of-the-union-addresses'
6
  dataset = load_dataset(sotu_dataset)
7
  df = dataset['train'].to_pandas()
8
-
9
- print(df.head(10))
10
-
11
- def greet(name):
12
- return "Hello " + name + ", you're cool!!"
13
 
14
  # Create a Gradio interface with blocks
15
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
16
  with gr.Row():
17
- gr.Markdown("# A Dashboard to Analyze the State of the Union Addresses")
18
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  demo.launch()
 
1
  import gradio as gr
2
  from datasets import load_dataset
3
+ from nltk.util import ngrams
4
+ from collections import Counter
5
 
6
  # Load the dataset and convert it to a Pandas dataframe
7
  sotu_dataset = 'jsulz/state-of-the-union-addresses'
8
  dataset = load_dataset(sotu_dataset)
9
  df = dataset['train'].to_pandas()
10
+ df['word_count'] = df['speech_html'].apply(lambda x: len(x.split()))
11
+ written = df[df['categories'] == 'Written']
12
+ spoken = df[df['categories'] == 'Spoken']
 
 
13
 
14
  # Create a Gradio interface with blocks
15
  with gr.Blocks() as demo:
16
+ gr.Markdown(
17
+ """
18
+ # A Dashboard to Analyze the State of the Union Addresses
19
+ """)
20
+ # get all unique president names
21
+ presidents = df['potus'].unique()
22
+ # convert presidents to a list
23
+ presidents = presidents.tolist()
24
+ # create a dropdown to select a president
25
+ president = gr.Dropdown(label="Select a President", choices=["All"] + presidents)
26
  with gr.Row():
27
+ # if president is not of type string
28
+ @gr.render(inputs=president)
29
+ def show_text(potus):
30
+ if potus is not None:
31
+ gr.Markdown(f"{potus} was the first president of the United States.")
32
+
33
+ @gr.render(inputs=president)
34
+ def word_length_bar(potus):
35
+ # calculate the total number of words in the speech_html column and add it to a new column
36
+ # if the president is "All", show the word count for all presidents
37
+ if potus == "All":
38
+ gr.BarPlot(df, x="date", y="word_count", title="Total Number of Words in the Speeches")
39
+ else:
40
+ # if the president is not "All", show the word count for the selected president
41
+ gr.BarPlot(df[df['potus'] == potus], x="date", y="word_count", title="Total Number of Words in the Speeches")
42
+ with gr.Row():
43
+
44
+ @gr.render(inputs=president)
45
+ def ngram_bar(potus):
46
+ # create a Counter object from the trigrams
47
+ potus_df = df[df["potus"] == potus]
48
+ trigrams = (
49
+ potus_df["tokens-nostop"].apply(lambda x: list(ngrams(x, 3))).apply(Counter).sum()
50
+ )
51
+ # get the most common trigrams
52
+ common_trigrams = trigrams.most_common(20)
53
+ # unzip the list of tuples and plot the trigrams and counts as a bar chart
54
+ trigrams, counts = zip(*common_trigrams)
55
+ # join the trigrams into a single string
56
+ trigrams = [" ".join(trigram) for trigram in trigrams]
57
+ # create a dataframe from the trigrams and counts
58
+ trigrams_df = pd.DataFrame({"trigrams": trigrams, "counts": counts})
59
+ # plot the trigrams and counts as a bar chart
60
+ gr.BarPlot(trigrams_df, x="trigrams", y="counts", title="Most Common Trigrams")
61
 
62
  demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ nltk=="^3.9.1"