import streamlit as st import pandas as pd import datetime import io import nltk import base64 import os from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from sklearn.feature_extraction.text import CountVectorizer from sklearn.decomposition import LatentDirichletAllocation nltk.download('punkt') nltk.download('stopwords') def generate_file_name(text, file_type): try: # Tokenize the text into sentences sentences = sent_tokenize(text) # Tokenize the sentences into words and remove stopwords words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.isalnum()] stop_words = set(stopwords.words('english')) filtered_words = [word for word in words if word not in stop_words] # Count word frequencies word_freq = nltk.FreqDist(filtered_words) # Get the top 3 most frequent words top_words = [word for word, _ in word_freq.most_common(3)] # Generate the file name current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"{'-'.join(top_words)}_{current_time}.{file_type}" return file_name except: # Fallback to default file naming if an error occurs current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"text_file_{current_time}.{file_type}" return file_name def save_text_as_file(text, file_type): file_name = generate_file_name(text, file_type) with open(file_name, "w") as file: file.write(text) st.success(f"Text saved as {file_name}") return file_name def save_list_as_excel(text): lines = text.split("\n") data = [] for line in lines: if line.strip(): parts = line.split(" - ", 1) if len(parts) == 2: data.append(parts) else: data.append([line.strip(), ""]) df = pd.DataFrame(data, columns=["Character", "Description"]) file_name = generate_file_name(text, "xlsx") df.to_excel(file_name, index=False) st.success(f"Character list saved as {file_name}") return file_name @st.cache_resource def get_download_link(file_path): try: with open(file_path, 'rb') as file: data = file.read() b64 = base64.b64encode(data).decode() file_name = os.path.basename(file_path) ext = os.path.splitext(file_name)[1] # get the file extension if ext == '.xlsx': mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' elif ext == '.csv': mime_type = 'text/csv' elif ext == '.md': mime_type = 'text/markdown' else: mime_type = 'application/octet-stream' # general binary data type href = f'{file_name}' return href except: return '' def perform_nlp(text): sentences = sent_tokenize(text) # Topic Modeling vectorizer = CountVectorizer(stop_words='english') X = vectorizer.fit_transform(sentences) lda = LatentDirichletAllocation(n_components=3, random_state=42) lda.fit(X) topics = lda.transform(X) # Display topics st.subheader("Topic Modeling") for i, topic in enumerate(topics): st.write(f"Topic {i+1}:") topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]]) st.write(topic_words) # Word Frequency word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10] st.subheader("Word Frequency") st.bar_chart(word_freq) def show_files_in_directory(): st.subheader("Files in Current Directory") files = [] for file in os.listdir("."): if file.endswith((".md", ".xlsx", ".csv")): file_size = os.path.getsize(file) file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S") files.append({"File Name": get_download_link(file), "Size (bytes)": file_size, "Last Modified": file_modified_time}) files_df = pd.DataFrame(files) st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True) def main(): st.title("AI UI for Text Processing") text_input = st.text_area("Paste your text here") if st.button("Process Text"): if text_input.strip() == "": st.warning("Please paste some text.") else: file_name = None if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input: file_name = save_list_as_excel(text_input) save_text_as_file(text_input, "csv") save_text_as_file(text_input, "md") elif "." in text_input or "!" in text_input or "?" in text_input: file_name = save_text_as_file(text_input, "txt") save_text_as_file(text_input, "csv") save_text_as_file(text_input, "md") perform_nlp(text_input) else: file_name = save_text_as_file(text_input, "txt") save_text_as_file(text_input, "csv") save_text_as_file(text_input, "md") if file_name: try: df = pd.read_excel(file_name) st.subheader("Saved Data") st.dataframe(df) st.markdown(get_download_link(file_name), unsafe_allow_html=True) st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True) st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True) except: pass show_files_in_directory() if __name__ == "__main__": main()