File size: 5,982 Bytes
15fa1c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st
import pandas as pd
import datetime
import io
import nltk
import base64
import os
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

nltk.download('punkt')
nltk.download('stopwords')

def generate_file_name(text, file_type):
    try:
        # Tokenize the text into sentences
        sentences = sent_tokenize(text)
        
        # Tokenize the sentences into words and remove stopwords
        words = [word.lower() for sentence in sentences for word in word_tokenize(sentence) if word.isalnum()]
        stop_words = set(stopwords.words('english'))
        filtered_words = [word for word in words if word not in stop_words]
        
        # Count word frequencies
        word_freq = nltk.FreqDist(filtered_words)
        
        # Get the top 3 most frequent words
        top_words = [word for word, _ in word_freq.most_common(3)]
        
        # Generate the file name
        current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        file_name = f"{'-'.join(top_words)}_{current_time}.{file_type}"
        return file_name
    except:
        # Fallback to default file naming if an error occurs
        current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        file_name = f"text_file_{current_time}.{file_type}"
        return file_name

def save_text_as_file(text, file_type):
    file_name = generate_file_name(text, file_type)
    with open(file_name, "w") as file:
        file.write(text)
    st.success(f"Text saved as {file_name}")
    return file_name

def save_list_as_excel(text):
    lines = text.split("\n")
    data = []
    for line in lines:
        if line.strip():
            parts = line.split(" - ", 1)
            if len(parts) == 2:
                data.append(parts)
            else:
                data.append([line.strip(), ""])
    df = pd.DataFrame(data, columns=["Character", "Description"])
    file_name = generate_file_name(text, "xlsx")
    df.to_excel(file_name, index=False)
    st.success(f"Character list saved as {file_name}")
    return file_name

@st.cache_resource
def get_download_link(file_path):
    try:
        with open(file_path, 'rb') as file:
            data = file.read()
            b64 = base64.b64encode(data).decode()
            file_name = os.path.basename(file_path)
            ext = os.path.splitext(file_name)[1]  # get the file extension
            if ext == '.xlsx':
                mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            elif ext == '.csv':
                mime_type = 'text/csv'
            elif ext == '.md':
                mime_type = 'text/markdown'
            else:
                mime_type = 'application/octet-stream'  # general binary data type
            href = f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
            return href
    except:
        return ''

def perform_nlp(text):
    sentences = sent_tokenize(text)
    # Topic Modeling
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(sentences)
    lda = LatentDirichletAllocation(n_components=3, random_state=42)
    lda.fit(X)
    topics = lda.transform(X)
    # Display topics
    st.subheader("Topic Modeling")
    for i, topic in enumerate(topics):
        st.write(f"Topic {i+1}:")
        topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
        st.write(topic_words)
    # Word Frequency
    word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
    st.subheader("Word Frequency")
    st.bar_chart(word_freq)

def show_files_in_directory():
    st.subheader("Files in Current Directory")
    files = []
    for file in os.listdir("."):
        if file.endswith((".md", ".xlsx", ".csv")):
            file_size = os.path.getsize(file)
            file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
            files.append({"File Name": get_download_link(file), "Size (bytes)": file_size, "Last Modified": file_modified_time})
    files_df = pd.DataFrame(files)
    st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)

def main():
    st.title("AI UI for Text Processing")
    text_input = st.text_area("Paste your text here")

    if st.button("Process Text"):
        if text_input.strip() == "":
            st.warning("Please paste some text.")
        else:
            file_name = None
            if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
                file_name = save_list_as_excel(text_input)
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
            elif "." in text_input or "!" in text_input or "?" in text_input:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
                perform_nlp(text_input)
            else:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")

            if file_name:
                try:
                    df = pd.read_excel(file_name)
                    st.subheader("Saved Data")
                    st.dataframe(df)
                    st.markdown(get_download_link(file_name), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True)
                except:
                    pass

    show_files_in_directory()

if __name__ == "__main__":
    main()