Spaces:

awacke1
/

NLPTopicModelerProcess-xlsx-csv-md

Running

App Files Files Community

awacke1 commited on May 7

Commit

066fc55

•

1 Parent(s): c24ee99

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -27

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 import datetime
 import io
 import nltk
 from nltk.tokenize import sent_tokenize
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
@@ -15,28 +16,30 @@ def save_text_as_file(text, file_type):
     with open(file_name, "w") as file:
         file.write(text)
     st.success(f"Text saved as {file_name}")
-def save_csv_as_excel(text):
-    try:
-        df = pd.read_csv(io.StringIO(text), header=None)
-        if df.iloc[0].dtype == object:
-            header = 0
-            file_name = f"csv_with_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
-        else:
-            header = None
-            file_name = f"csv_without_header_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
-        df.to_excel(file_name, index=False, header=header)
-        st.success(f"CSV data saved as {file_name}")
-        st.dataframe(df)
-    except pd.errors.EmptyDataError:
-        st.error("The pasted text does not contain valid CSV data.")
-    except pd.errors.ParserError as e:
-        st.error(f"Error parsing CSV data: {str(e)}")
-        st.info("Please ensure that the pasted text is in a valid CSV format.")
-def split_sentences(text):
-    sentences = sent_tokenize(text)
-    return "\n".join(sentences)
 def perform_nlp(text):
     sentences = sent_tokenize(text)
@@ -68,15 +71,23 @@ def main():
         if text_input.strip() == "":
             st.warning("Please paste some text.")
         else:
-            if "," in text_input or "\t" in text_input:
-                save_csv_as_excel(text_input)
             elif "." in text_input or "!" in text_input or "?" in text_input:
-                sentences = split_sentences(text_input)
-                st.subheader("Sentences")
-                st.write(sentences)
                 perform_nlp(text_input)
             else:
-                save_text_as_file(text_input, "txt")
 if __name__ == "__main__":
     main()

 import datetime
 import io
 import nltk
+import base64
 from nltk.tokenize import sent_tokenize
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
     with open(file_name, "w") as file:
         file.write(text)
     st.success(f"Text saved as {file_name}")
+    return file_name
+def save_list_as_excel(text):
+    lines = text.split("\n")
+    data = []
+    for line in lines:
+        if line.strip():
+            parts = line.split(" - ", 1)
+            if len(parts) == 2:
+                data.append(parts)
+            else:
+                data.append([line.strip(), ""])
+    df = pd.DataFrame(data, columns=["Character", "Description"])
+    file_name = f"character_list_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+    df.to_excel(file_name, index=False)
+    st.success(f"Character list saved as {file_name}")
+    return file_name
+def get_download_link(file_path):
+    with open(file_path, 'rb') as f:
+        data = f.read()
+    b64 = base64.b64encode(data).decode()
+    href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
+    return href
 def perform_nlp(text):
     sentences = sent_tokenize(text)
         if text_input.strip() == "":
             st.warning("Please paste some text.")
         else:
+            file_name = None
+            if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
+                file_name = save_list_as_excel(text_input)
             elif "." in text_input or "!" in text_input or "?" in text_input:
+                file_name = save_text_as_file(text_input, "txt")
                 perform_nlp(text_input)
             else:
+                file_name = save_text_as_file(text_input, "txt")
+            if file_name:
+                try:
+                    df = pd.read_excel(file_name)
+                    st.subheader("Saved Data")
+                    st.dataframe(df)
+                    st.markdown(get_download_link(file_name), unsafe_allow_html=True)
+                except:
+                    pass
 if __name__ == "__main__":
     main()