awacke1 commited on
Commit
3caaeaf
1 Parent(s): 8d0cab2

Update backup.app.py

Browse files
Files changed (1) hide show
  1. backup.app.py +60 -28
backup.app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import re
3
  import nltk
 
4
  from nltk.corpus import stopwords
5
  from nltk import FreqDist
6
  from graphviz import Digraph
@@ -9,56 +10,80 @@ nltk.download('punkt')
9
  nltk.download('stopwords')
10
 
11
  def remove_timestamps(text):
12
- return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text) # Updated regex pattern
13
-
14
- def process_text(text):
15
- lines = text.split("\n")
16
- processed_lines = []
17
-
18
- for line in lines:
19
- if line:
20
- processed_lines.append(line)
21
-
22
- outline = ""
23
- for i, line in enumerate(processed_lines):
24
- if i % 2 == 0:
25
- outline += f"**{line}**\n"
26
- else:
27
- outline += f"- {line} 😄\n"
28
-
29
- return outline
30
 
31
  def extract_high_information_words(text, top_n=10):
32
  words = nltk.word_tokenize(text)
33
  words = [word.lower() for word in words if word.isalpha()]
34
-
35
  stop_words = set(stopwords.words('english'))
36
  filtered_words = [word for word in words if word not in stop_words]
37
-
38
  freq_dist = FreqDist(filtered_words)
39
- high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
40
-
41
- return high_information_words
42
 
43
  def create_relationship_graph(words):
44
  graph = Digraph()
45
-
46
  for index, word in enumerate(words):
47
  graph.node(str(index), word)
48
-
49
  if index > 0:
50
  graph.edge(str(index - 1), str(index), label=str(index))
51
-
52
  return graph
53
 
54
  def display_relationship_graph(words):
55
  graph = create_relationship_graph(words)
56
  st.graphviz_chart(graph)
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  uploaded_file = st.file_uploader("Choose a .txt file", type=['txt'])
59
 
60
- if uploaded_file:
 
 
 
 
61
  file_text = uploaded_file.read().decode("utf-8")
 
 
 
 
62
  text_without_timestamps = remove_timestamps(file_text)
63
 
64
  top_words = extract_high_information_words(text_without_timestamps, 10)
@@ -66,4 +91,11 @@ if uploaded_file:
66
  st.write(top_words)
67
 
68
  st.markdown("**Relationship Graph:**")
69
- display_relationship_graph(top_words)
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import re
3
  import nltk
4
+ import os
5
  from nltk.corpus import stopwords
6
  from nltk import FreqDist
7
  from graphviz import Digraph
 
10
  nltk.download('stopwords')
11
 
12
  def remove_timestamps(text):
13
+ return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def extract_high_information_words(text, top_n=10):
16
  words = nltk.word_tokenize(text)
17
  words = [word.lower() for word in words if word.isalpha()]
 
18
  stop_words = set(stopwords.words('english'))
19
  filtered_words = [word for word in words if word not in stop_words]
 
20
  freq_dist = FreqDist(filtered_words)
21
+ return [word for word, _ in freq_dist.most_common(top_n)]
 
 
22
 
23
  def create_relationship_graph(words):
24
  graph = Digraph()
 
25
  for index, word in enumerate(words):
26
  graph.node(str(index), word)
 
27
  if index > 0:
28
  graph.edge(str(index - 1), str(index), label=str(index))
 
29
  return graph
30
 
31
  def display_relationship_graph(words):
32
  graph = create_relationship_graph(words)
33
  st.graphviz_chart(graph)
34
 
35
+ def extract_context_words(text, high_information_words):
36
+ words = nltk.word_tokenize(text)
37
+ context_words = []
38
+ for index, word in enumerate(words):
39
+ if word.lower() in high_information_words:
40
+ before_word = words[index - 1] if index > 0 else None
41
+ after_word = words[index + 1] if index < len(words) - 1 else None
42
+ context_words.append((before_word, word, after_word))
43
+ return context_words
44
+
45
+ def create_context_graph(context_words):
46
+ graph = Digraph()
47
+ for index, (before_word, high_info_word, after_word) in enumerate(context_words):
48
+ graph.node(f'before{index}', before_word, shape='box') if before_word else None
49
+ graph.node(f'high{index}', high_info_word, shape='ellipse')
50
+ graph.node(f'after{index}', after_word, shape='diamond') if after_word else None
51
+ if before_word:
52
+ graph.edge(f'before{index}', f'high{index}')
53
+ if after_word:
54
+ graph.edge(f'high{index}', f'after{index}')
55
+ return graph
56
+
57
+ def display_context_graph(context_words):
58
+ graph = create_context_graph(context_words)
59
+ st.graphviz_chart(graph)
60
+
61
+ def display_context_table(context_words):
62
+ table = "| Before | High Info Word | After |\n|--------|----------------|-------|\n"
63
+ for before, high, after in context_words:
64
+ table += f"| {before if before else ''} | {high} | {after if after else ''} |\n"
65
+ st.markdown(table)
66
+
67
+ def load_example_files():
68
+ example_files = [f for f in os.listdir() if f.endswith('.txt')]
69
+ selected_file = st.selectbox("Select an example file:", example_files)
70
+ if st.button(f"Load {selected_file}"):
71
+ with open(selected_file, 'r', encoding="utf-8") as file:
72
+ return file.read()
73
+ return None
74
+
75
  uploaded_file = st.file_uploader("Choose a .txt file", type=['txt'])
76
 
77
+ example_text = load_example_files()
78
+
79
+ if example_text:
80
+ file_text = example_text
81
+ elif uploaded_file:
82
  file_text = uploaded_file.read().decode("utf-8")
83
+ else:
84
+ file_text = ""
85
+
86
+ if file_text:
87
  text_without_timestamps = remove_timestamps(file_text)
88
 
89
  top_words = extract_high_information_words(text_without_timestamps, 10)
 
91
  st.write(top_words)
92
 
93
  st.markdown("**Relationship Graph:**")
94
+ display_relationship_graph(top_words)
95
+
96
+ context_words = extract_context_words(text_without_timestamps, top_words)
97
+ st.markdown("**Context Graph:**")
98
+ display_context_graph(context_words)
99
+
100
+ st.markdown("**Context Table:**")
101
+ display_context_table(context_words)