awacke1's picture
Update app.py
066fc55 verified
raw
history blame
No virus
3.21 kB
import streamlit as st
import pandas as pd
import datetime
import io
import nltk
import base64
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
nltk.download('punkt')
def save_text_as_file(text, file_type):
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"text_file_{current_time}.{file_type}"
with open(file_name, "w") as file:
file.write(text)
st.success(f"Text saved as {file_name}")
return file_name
def save_list_as_excel(text):
lines = text.split("\n")
data = []
for line in lines:
if line.strip():
parts = line.split(" - ", 1)
if len(parts) == 2:
data.append(parts)
else:
data.append([line.strip(), ""])
df = pd.DataFrame(data, columns=["Character", "Description"])
file_name = f"character_list_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
df.to_excel(file_name, index=False)
st.success(f"Character list saved as {file_name}")
return file_name
def get_download_link(file_path):
with open(file_path, 'rb') as f:
data = f.read()
b64 = base64.b64encode(data).decode()
href = f'<a href="data:application/octet-stream;base64,{b64}" download="{file_path}">Download {file_path}</a>'
return href
def perform_nlp(text):
sentences = sent_tokenize(text)
# Topic Modeling
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(sentences)
lda = LatentDirichletAllocation(n_components=3, random_state=42)
lda.fit(X)
topics = lda.transform(X)
# Display topics
st.subheader("Topic Modeling")
for i, topic in enumerate(topics):
st.write(f"Topic {i+1}:")
topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
st.write(topic_words)
# Word Frequency
word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
st.subheader("Word Frequency")
st.bar_chart(word_freq)
def main():
st.title("AI UI for Text Processing")
text_input = st.text_area("Paste your text here")
if st.button("Process Text"):
if text_input.strip() == "":
st.warning("Please paste some text.")
else:
file_name = None
if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
file_name = save_list_as_excel(text_input)
elif "." in text_input or "!" in text_input or "?" in text_input:
file_name = save_text_as_file(text_input, "txt")
perform_nlp(text_input)
else:
file_name = save_text_as_file(text_input, "txt")
if file_name:
try:
df = pd.read_excel(file_name)
st.subheader("Saved Data")
st.dataframe(df)
st.markdown(get_download_link(file_name), unsafe_allow_html=True)
except:
pass
if __name__ == "__main__":
main()