awacke1 commited on
Commit
b7fdd22
1 Parent(s): d44e2e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -46
app.py CHANGED
@@ -1,61 +1,42 @@
1
- import streamlit as st
2
- import re
3
- import nltk
4
- from nltk.corpus import stopwords
5
- from nltk import FreqDist
6
- from graphviz import Digraph
7
-
8
- nltk.download('punkt')
9
- nltk.download('stopwords')
10
-
11
- def remove_timestamps(text):
12
- return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text) # Updated regex pattern
13
-
14
- def process_text(text):
15
- lines = text.split("\n")
16
- processed_lines = []
17
-
18
- for line in lines:
19
- if line:
20
- processed_lines.append(line)
21
-
22
- outline = ""
23
- for i, line in enumerate(processed_lines):
24
- if i % 2 == 0:
25
- outline += f"**{line}**\n"
26
- else:
27
- outline += f"- {line} 😄\n"
28
-
29
- return outline
30
-
31
- def extract_high_information_words(text, top_n=10):
32
  words = nltk.word_tokenize(text)
33
- words = [word.lower() for word in words if word.isalpha()]
34
 
35
- stop_words = set(stopwords.words('english'))
36
- filtered_words = [word for word in words if word not in stop_words]
37
-
38
- freq_dist = FreqDist(filtered_words)
39
- high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
40
 
41
- return high_information_words
42
 
43
- def create_relationship_graph(words):
44
  graph = Digraph()
45
 
46
- for index, word in enumerate(words):
47
- graph.node(str(index), word)
 
 
48
 
49
- if index > 0:
50
- graph.edge(str(index - 1), str(index), label=str(index))
 
 
51
 
52
  return graph
53
 
54
- def display_relationship_graph(words):
55
- graph = create_relationship_graph(words)
56
  st.graphviz_chart(graph)
57
 
58
- uploaded_file = st.file_uploader("Choose a .txt file", type=['txt'])
 
 
 
 
 
 
 
59
 
60
  if uploaded_file:
61
  file_text = uploaded_file.read().decode("utf-8")
@@ -65,5 +46,12 @@ if uploaded_file:
65
  st.markdown("**Top 10 High Information Words:**")
66
  st.write(top_words)
67
 
 
68
  st.markdown("**Relationship Graph:**")
69
  display_relationship_graph(top_words)
 
 
 
 
 
 
 
1
+ def extract_context_words(text, high_information_words):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  words = nltk.word_tokenize(text)
3
+ context_words = []
4
 
5
+ for index, word in enumerate(words):
6
+ if word.lower() in high_information_words:
7
+ before_word = words[index - 1] if index > 0 else None
8
+ after_word = words[index + 1] if index < len(words) - 1 else None
9
+ context_words.append((before_word, word, after_word))
10
 
11
+ return context_words
12
 
13
+ def create_context_graph(context_words):
14
  graph = Digraph()
15
 
16
+ for index, (before_word, high_info_word, after_word) in enumerate(context_words):
17
+ graph.node(f'before{index}', before_word, shape='box') if before_word else None
18
+ graph.node(f'high{index}', high_info_word, shape='ellipse')
19
+ graph.node(f'after{index}', after_word, shape='diamond') if after_word else None
20
 
21
+ if before_word:
22
+ graph.edge(f'before{index}', f'high{index}')
23
+ if after_word:
24
+ graph.edge(f'high{index}', f'after{index}')
25
 
26
  return graph
27
 
28
+ def display_context_graph(context_words):
29
+ graph = create_context_graph(context_words)
30
  st.graphviz_chart(graph)
31
 
32
+ def display_context_table(context_words):
33
+ table = "| Before | High Info Word | After |\n|--------|----------------|-------|\n"
34
+ for before, high, after in context_words:
35
+ table += f"| {before if before else ''} | {high} | {after if after else ''} |\n"
36
+ st.markdown(table)
37
+
38
+
39
+ # ...
40
 
41
  if uploaded_file:
42
  file_text = uploaded_file.read().decode("utf-8")
 
46
  st.markdown("**Top 10 High Information Words:**")
47
  st.write(top_words)
48
 
49
+ context_words = extract_context_words(text_without_timestamps, top_words)
50
  st.markdown("**Relationship Graph:**")
51
  display_relationship_graph(top_words)
52
+
53
+ st.markdown("**Context Graph:**")
54
+ display_context_graph(context_words)
55
+
56
+ st.markdown("**Context Table:**")
57
+ display_context_table(context_words)