File size: 4,918 Bytes
21586f0
 
 
5defcd2
0a2414d
066fc55
820215f
0a2414d
 
 
 
 
21586f0
 
 
 
 
 
 
066fc55
21586f0
066fc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b05f92
9f63d2a
9b05f92
 
 
 
 
 
 
 
 
 
 
 
 
 
9f63d2a
9b05f92
 
 
21586f0
0a2414d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820215f
 
 
 
 
 
 
8b74955
820215f
 
 
0a2414d
 
21586f0
820215f
21586f0
 
 
 
066fc55
 
 
820215f
 
0a2414d
066fc55
820215f
 
0a2414d
21586f0
066fc55
820215f
 
 
066fc55
 
 
 
 
9f63d2a
 
 
066fc55
 
21586f0
820215f
 
21586f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import pandas as pd
import datetime
import io
import nltk
import base64
import os
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

nltk.download('punkt')

def save_text_as_file(text, file_type):
    current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    file_name = f"text_file_{current_time}.{file_type}"
    with open(file_name, "w") as file:
        file.write(text)
    st.success(f"Text saved as {file_name}")
    return file_name

def save_list_as_excel(text):
    lines = text.split("\n")
    data = []
    for line in lines:
        if line.strip():
            parts = line.split(" - ", 1)
            if len(parts) == 2:
                data.append(parts)
            else:
                data.append([line.strip(), ""])
    df = pd.DataFrame(data, columns=["Character", "Description"])
    file_name = f"character_list_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
    df.to_excel(file_name, index=False)
    st.success(f"Character list saved as {file_name}")
    return file_name

@st.cache_resource
def get_download_link(file_path):
    try:
        with open(file_path, 'rb') as file:
            data = file.read()
            b64 = base64.b64encode(data).decode()
            file_name = os.path.basename(file_path)
            ext = os.path.splitext(file_name)[1]  # get the file extension
            if ext == '.xlsx':
                mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            elif ext == '.csv':
                mime_type = 'text/csv'
            elif ext == '.md':
                mime_type = 'text/markdown'
            else:
                mime_type = 'application/octet-stream'  # general binary data type
            href = f'<a href="data:{mime_type};base64,{b64}" download="{file_name}">{file_name}</a>'
            return href
    except:
        return ''

def perform_nlp(text):
    sentences = sent_tokenize(text)
    # Topic Modeling
    vectorizer = CountVectorizer(stop_words='english')
    X = vectorizer.fit_transform(sentences)
    lda = LatentDirichletAllocation(n_components=3, random_state=42)
    lda.fit(X)
    topics = lda.transform(X)
    # Display topics
    st.subheader("Topic Modeling")
    for i, topic in enumerate(topics):
        st.write(f"Topic {i+1}:")
        topic_words = ", ".join([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-6:-1]])
        st.write(topic_words)
    # Word Frequency
    word_freq = pd.Series(" ".join(sentences).split()).value_counts()[:10]
    st.subheader("Word Frequency")
    st.bar_chart(word_freq)

def show_files_in_directory():
    st.subheader("Files in Current Directory")
    files = []
    for file in os.listdir("."):
        if file.endswith((".md", ".xlsx", ".csv")):
            file_size = os.path.getsize(file)
            file_modified_time = datetime.datetime.fromtimestamp(os.path.getmtime(file)).strftime("%Y-%m-%d %H:%M:%S")
            files.append({"File Name": get_download_link(file), "Size (bytes)": file_size, "Last Modified": file_modified_time})
    files_df = pd.DataFrame(files)
    st.write(files_df.to_html(escape=False, index=False), unsafe_allow_html=True)

def main():
    st.title("AI UI for Text Processing")
    text_input = st.text_area("Paste your text here")

    if st.button("Process Text"):
        if text_input.strip() == "":
            st.warning("Please paste some text.")
        else:
            file_name = None
            if text_input.strip().startswith(("1.", "1 -", "1 _")) and "\n" in text_input:
                file_name = save_list_as_excel(text_input)
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
            elif "." in text_input or "!" in text_input or "?" in text_input:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")
                perform_nlp(text_input)
            else:
                file_name = save_text_as_file(text_input, "txt")
                save_text_as_file(text_input, "csv")
                save_text_as_file(text_input, "md")

            if file_name:
                try:
                    df = pd.read_excel(file_name)
                    st.subheader("Saved Data")
                    st.dataframe(df)
                    st.markdown(get_download_link(file_name), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".csv")), unsafe_allow_html=True)
                    st.markdown(get_download_link(file_name.replace(".xlsx", ".md")), unsafe_allow_html=True)
                except:
                    pass

    show_files_in_directory()

if __name__ == "__main__":
    main()