awacke1 commited on
Commit
066fc55
1 Parent(s): c24ee99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -27
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import datetime
4
  import io
5
  import nltk
 
6
  from nltk.tokenize import sent_tokenize
7
  from sklearn.feature_extraction.text import CountVectorizer
8
  from sklearn.decomposition import LatentDirichletAllocation
@@ -15,28 +16,30 @@ def save_text_as_file(text, file_type):
15
  with open(file_name, "w") as file:
16
  file.write(text)
17
  st.success(f"Text saved as {file_name}")
 
18
 
19
- def save_csv_as_excel(text):
20
- try:
21
- df = pd.read_csv(io.StringIO(text), header=None)
22
- if df.iloc[0].dtype == object:
23
- header = 0
24
- file_name = f"csv_with_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
25
- else:
26
- header = None
27
- file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
28
- df.to_excel(file_name, index=False, header=header)
29
- st.success(f"CSV data saved as {file_name}")
30
- st.dataframe(df)
31
- except pd.errors.EmptyDataError:
32
- st.error("The pasted text does not contain valid CSV data.")
33
- except pd.errors.ParserError as e:
34
- st.error(f"Error parsing CSV data: {str(e)}")
35
- st.info("Please ensure that the pasted text is in a valid CSV format.")
36
-
37
- def split_sentences(text):
38
- sentences = sent_tokenize(text)
39
- return "\n".join(sentences)
 
40
 
41
  def perform_nlp(text):
42
  sentences = sent_tokenize(text)
@@ -68,15 +71,23 @@ def main():
68
  if text_input.strip() == "":
69
  st.warning("Please paste some text.")
70
  else:
71
- if "," in text_input or "\t" in text_input:
72
- save_csv_as_excel(text_input)
 
73
  elif "." in text_input or "!" in text_input or "?" in text_input:
74
- sentences = split_sentences(text_input)
75
- st.subheader("Sentences")
76
- st.write(sentences)
77
  perform_nlp(text_input)
78
  else:
79
- save_text_as_file(text_input, "txt")
 
 
 
 
 
 
 
 
 
80
 
81
  if __name__ == "__main__":
82
  main()
 
3
  import datetime
4
  import io
5
  import nltk
6
+ import base64
7
  from nltk.tokenize import sent_tokenize
8
  from sklearn.feature_extraction.text import CountVectorizer
9
  from sklearn.decomposition import LatentDirichletAllocation
 
16
  with open(file_name, "w") as file:
17
  file.write(text)
18
  st.success(f"Text saved as {file_name}")
19
+ return file_name
20
 
21
+ def save_list_as_excel(text):
22
+ lines = text.split("\n")
23
+ data = []
24
+ for line in lines:
25
+ if line.strip():
26
+ parts = line.split(" - ", 1)
27
+ if len(parts) == 2:
28
+ data.append(parts)
29
+ else:
30
+ data.append([line.strip(), ""])
31
+ df = pd.DataFrame(data, columns=["Character", "Description"])
32
+ file_name = f"character_list_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
33
+ df.to_excel(file_name, index=False)
34
+ st.success(f"Character list saved as {file_name}")
35
+ return file_name
36
+
37
+ def get_download_link(file_path):
38
+ with open(file_path, 'rb') as f:
39
+ data = f.read()
40
+ b64 = base64.b64encode(data).decode()
41
+ href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
42
+ return href
43
 
44
  def perform_nlp(text):
45
  sentences = sent_tokenize(text)
 
71
  if text_input.strip() == "":
72
  st.warning("Please paste some text.")
73
  else:
74
+ file_name = None
75
+ if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
76
+ file_name = save_list_as_excel(text_input)
77
  elif "." in text_input or "!" in text_input or "?" in text_input:
78
+ file_name = save_text_as_file(text_input, "txt")
 
 
79
  perform_nlp(text_input)
80
  else:
81
+ file_name = save_text_as_file(text_input, "txt")
82
+
83
+ if file_name:
84
+ try:
85
+ df = pd.read_excel(file_name)
86
+ st.subheader("Saved Data")
87
+ st.dataframe(df)
88
+ st.markdown(get_download_link(file_name), unsafe_allow_html=True)
89
+ except:
90
+ pass
91
 
92
  if __name__ == "__main__":
93
  main()