File size: 11,020 Bytes
92eb8dc
8a77bf4
 
 
cf04254
8a77bf4
 
 
 
ed8ed11
 
 
 
 
 
 
 
 
 
 
 
 
43ed0dc
 
 
ed8ed11
92eb8dc
8a77bf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7fdd22
d9c302e
b7fdd22
 
 
 
 
 
 
d9c302e
b7fdd22
d9c302e
b7fdd22
 
 
 
 
 
 
 
d9c302e
 
b7fdd22
 
d9c302e
 
b7fdd22
 
 
 
 
 
884704e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29c7849
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
884704e
92eb8dc
29c7849
cf04254
92eb8dc
 
cf04254
 
 
 
92eb8dc
 
d9c302e
cf04254
 
 
 
 
d9c302e
cf04254
 
 
 
d9c302e
 
 
92eb8dc
 
 
 
 
b7fdd22
8d0cab2
 
92eb8dc
 
 
 
884704e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Import necessary libraries
import streamlit as st
import re
import nltk
import os
from nltk.corpus import stopwords
from nltk import FreqDist
from graphviz import Digraph

# Set page configuration with a title and favicon
st.set_page_config(
    page_title="πŸ“ΊTranscriptπŸ“œEDAπŸ”NLTK",
    page_icon="🌠",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://huggingface.co/awacke1',
        'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
        'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
    }
)

st.markdown('''πŸ” **Exploratory Data Analysis (EDA)** πŸ“Š:     - Dive deep into the sea of data with our EDA feature, unveiling hidden patterns πŸ•΅οΈβ€β™‚οΈ and insights 🧠 in your transcripts. Transform raw data into a treasure trove of information πŸ†.
πŸ“œ **Natural Language Toolkit (NLTK)** πŸ› οΈ:     - Harness the power of NLTK to process and understand human language πŸ—£οΈ. From tokenization to sentiment analysis, our toolkit is your compass 🧭 in the vast landscape of natural language processing (NLP).
πŸ“Ί **Transcript Analysis** πŸ“ˆ:     - Elevate your text analysis with our advanced transcript analysis tools. Whether it's speech recognition πŸŽ™οΈ or thematic extraction 🌐, turn your audiovisual content into actionable insights πŸ”‘.''')

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

def remove_timestamps(text):
    return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text)

def extract_high_information_words(text, top_n=10):
    words = nltk.word_tokenize(text)
    words = [word.lower() for word in words if word.isalpha()]
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    freq_dist = FreqDist(filtered_words)
    return [word for word, _ in freq_dist.most_common(top_n)]

def create_relationship_graph(words):
    graph = Digraph()
    for index, word in enumerate(words):
        graph.node(str(index), word)
        if index > 0:
            graph.edge(str(index - 1), str(index), label=str(index))
    return graph

def display_relationship_graph(words):
    graph = create_relationship_graph(words)
    st.graphviz_chart(graph)

def extract_context_words(text, high_information_words):
    words = nltk.word_tokenize(text)
    context_words = []
    for index, word in enumerate(words):
        if word.lower() in high_information_words:
            before_word = words[index - 1] if index > 0 else None
            after_word = words[index + 1] if index < len(words) - 1 else None
            context_words.append((before_word, word, after_word))
    return context_words

def create_context_graph(context_words):
    graph = Digraph()
    for index, (before_word, high_info_word, after_word) in enumerate(context_words):
        graph.node(f'before{index}', before_word, shape='box') if before_word else None
        graph.node(f'high{index}', high_info_word, shape='ellipse')
        graph.node(f'after{index}', after_word, shape='diamond') if after_word else None
        if before_word:
            graph.edge(f'before{index}', f'high{index}')
        if after_word:
            graph.edge(f'high{index}', f'after{index}')
    return graph

def display_context_graph(context_words):
    graph = create_context_graph(context_words)
    st.graphviz_chart(graph)

def display_context_table(context_words):
    table = "| Before | High Info Word | After |\n|--------|----------------|-------|\n"
    for before, high, after in context_words:
        table += f"| {before if before else ''} | {high} | {after if after else ''} |\n"
    st.markdown(table)

def showInnovationOutlines():
    st.markdown("""
    
# AI App Areas in Demand and Opportunities for 100x πŸš€

## Creativity + Productivity πŸŽ¨βœ…

| **Area**            | **Opportunity**                                                                 | **Innovation Keywords**                                             |
|---------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------|
| **Content Generation** | Enable consumers to create art, music, videos, or graphics without complex training. | **Bridges creativity and craft**, making imagination a reality.    |
| **Content Editing**    | Automate editing workflows and introduce AI-native edits.                        | **Compose, refine, remix** content seamlessly.                      |
| **Productivity**       | Transform tasks into actions, providing leverage on time.                        | **Executing tasks** and **giving leverage** on time.                |

## High Opportunities for 100x 🌟

### Content Generation

| **What We're Looking For**       | **Details**                                                                                         | **Emoji** |
|----------------------------------|-----------------------------------------------------------------------------------------------------|-----------|
| **Killing the "blank page problem"** | From text prompts to slide decks, generation products that **create content** from "blank pages".  | πŸ“βž‘οΈπŸŒŸ    |
| **Making open source models accessible** | Products that **utilize tech** in the browser or app, making open-source models accessible.         | πŸ’»πŸŒ     |
| **Creating remixable outputs**    | Platforms that allow creators to **make work instantly remixable**, enhancing creativity.           | πŸ”πŸŽ¨     |

### Content Editing

| **What We're Looking For**       | **Details**                                                                                   | **Emoji** |
|----------------------------------|-----------------------------------------------------------------------------------------------|-----------|
| **Owning multi-media workflows** | Workflow products that allow users to **generate, refine, and stitch different content types**. | πŸ–ΌοΈ+🎡    |
| **Enabling in-platform refinement** | AI products that help users **automatically improve** their creations.                        | βœ¨πŸ”§     |
| **Iterating with intelligent editors** | Products that enable users to **refine existing outputs** without starting from scratch.       | πŸ”„βœοΈ    |

### Productivity

| **What We're Looking For**   | **Details**                                                                                         | **Emoji** |
|------------------------------|-----------------------------------------------------------------------------------------------------|-----------|
| **Agents that act as systems of action** | General and specialized agents that **complete tasks**, like booking restaurants or analyzing data. | πŸ€–πŸ’Ό     |
| **Voice-first apps**         | AI apps that prioritize **voice input**, making interaction more natural.                           | πŸ—£οΈπŸ“±    |
| **Apps that provide in-flow assistance** | Tools that **minimize context switching** by offering information and actions within workflow.      | πŸ”„πŸ› οΈ    |

## Companionship + Social πŸ§‘β€πŸ€β€πŸ§‘πŸŽ‰

| **Area**            | **Opportunity**                                                     | **Innovation Keywords**                                      |
|---------------------|---------------------------------------------------------------------|--------------------------------------------------------------|
| **Companionship**   | AI offers an **infinitely patient and engaging friend**.            | **Engaging in conversation** about any topic.                 |
| **Social**          | Enhancing interactions and helping **meet new people**.             | **Fun interactions** and **enhanced matchmaking**.            |

## Personal Growth 🌱

| **Area**            | **Opportunity**                                                     | **Innovation Keywords**                                      |
|---------------------|---------------------------------------------------------------------|--------------------------------------------------------------|
| **Education**       | Personalized learning environments for every consumer.              | **Personalized support** at a lower cost.                     |
| **Personal Finance**| AI-driven financial advice and portfolio management.                | **Money on autopilot** and **self-managing assets**.          |
| **Wellness**        | Judgment-free expert advice for a better future.                    | **Judgment-free experts** and **personalized wellness plans**.|

This table encapsulates the essence of AI's transformative potential across creativity, productivity, companionship, social engagement, and personal growth. By focusing on these key areas and innovation keywords, we identify the high-impact opportunities where AI can multiply value and redefine experiences.

    
    """)

def load_example_files():
    # Exclude specific files
    excluded_files = {'freeze.txt', 'requirements.txt', 'packages.txt', 'pre-requirements.txt'}
    
    # List all .txt files excluding the ones in excluded_files
    example_files = [f for f in os.listdir() if f.endswith('.txt') and f not in excluded_files]
    
    # Check if there are any files to select from
    if example_files:
        selected_file = st.selectbox("πŸ“„ Select an example file:", example_files)
        if st.button(f"πŸ“‚ Load {selected_file}"):
            with open(selected_file, 'r', encoding="utf-8") as file:
                return file.read()
    else:
        st.write("No suitable example files found.")
    
    return None
    
# Load example files
def load_example_files_old():
    example_files = [f for f in os.listdir() if f.endswith('.txt')]
    selected_file = st.selectbox("πŸ“„ Select an example file:", example_files)
    if st.button(f"πŸ“‚ Load {selected_file}"):
        with open(selected_file, 'r', encoding="utf-8") as file:
            return file.read()
    return None

# Main code for UI
uploaded_file = st.file_uploader("πŸ“ Choose a .txt file", type=['txt'])

example_text = load_example_files()

if example_text:
    file_text = example_text
elif uploaded_file:
    file_text = uploaded_file.read().decode("utf-8")
else:
    file_text = ""

if file_text:
    text_without_timestamps = remove_timestamps(file_text)
    top_words = extract_high_information_words(text_without_timestamps, 10)

    with st.expander("πŸ“Š Top 10 High Information Words"):
        st.write(top_words)

    with st.expander("πŸ“ˆ Relationship Graph"):
        display_relationship_graph(top_words)

    context_words = extract_context_words(text_without_timestamps, top_words)

    with st.expander("πŸ”— Context Graph"):
        display_context_graph(context_words)

    with st.expander("πŸ“‘ Context Table"):
        display_context_table(context_words)

    with st.expander("Innovation Outlines"):
        showInnovationOutlines()