# Import necessary libraries import streamlit as st import re import nltk import os from nltk.corpus import stopwords from nltk import FreqDist from graphviz import Digraph # Set page configuration with a title and favicon st.set_page_config( page_title="πŸ“ΊTranscriptπŸ“œEDAπŸ”NLTK", page_icon="🌠", layout="wide", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://huggingface.co/awacke1', 'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload", 'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558" } ) st.markdown('''πŸ” **Exploratory Data Analysis (EDA)** πŸ“Š: - Dive deep into the sea of data with our EDA feature, unveiling hidden patterns πŸ•΅οΈβ€β™‚οΈ and insights 🧠 in your transcripts. Transform raw data into a treasure trove of information πŸ†. πŸ“œ **Natural Language Toolkit (NLTK)** πŸ› οΈ: - Harness the power of NLTK to process and understand human language πŸ—£οΈ. From tokenization to sentiment analysis, our toolkit is your compass 🧭 in the vast landscape of natural language processing (NLP). πŸ“Ί **Transcript Analysis** πŸ“ˆ: - Elevate your text analysis with our advanced transcript analysis tools. Whether it's speech recognition πŸŽ™οΈ or thematic extraction 🌐, turn your audiovisual content into actionable insights πŸ”‘.''') # Download NLTK resources nltk.download('punkt') nltk.download('stopwords') def remove_timestamps(text): return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text) def extract_high_information_words(text, top_n=10): words = nltk.word_tokenize(text) words = [word.lower() for word in words if word.isalpha()] stop_words = set(stopwords.words('english')) filtered_words = [word for word in words if word not in stop_words] freq_dist = FreqDist(filtered_words) return [word for word, _ in freq_dist.most_common(top_n)] def create_relationship_graph(words): graph = Digraph() for index, word in enumerate(words): graph.node(str(index), word) if index > 0: graph.edge(str(index - 1), str(index), label=str(index)) return graph def display_relationship_graph(words): graph = create_relationship_graph(words) st.graphviz_chart(graph) def extract_context_words(text, high_information_words): words = nltk.word_tokenize(text) context_words = [] for index, word in enumerate(words): if word.lower() in high_information_words: before_word = words[index - 1] if index > 0 else None after_word = words[index + 1] if index < len(words) - 1 else None context_words.append((before_word, word, after_word)) return context_words def create_context_graph(context_words): graph = Digraph() for index, (before_word, high_info_word, after_word) in enumerate(context_words): graph.node(f'before{index}', before_word, shape='box') if before_word else None graph.node(f'high{index}', high_info_word, shape='ellipse') graph.node(f'after{index}', after_word, shape='diamond') if after_word else None if before_word: graph.edge(f'before{index}', f'high{index}') if after_word: graph.edge(f'high{index}', f'after{index}') return graph def display_context_graph(context_words): graph = create_context_graph(context_words) st.graphviz_chart(graph) def display_context_table(context_words): table = "| Before | High Info Word | After |\n|--------|----------------|-------|\n" for before, high, after in context_words: table += f"| {before if before else ''} | {high} | {after if after else ''} |\n" st.markdown(table) def showInnovationOutlines(): st.markdown(""" # AI App Areas in Demand and Opportunities for 100x πŸš€ ## Creativity + Productivity πŸŽ¨βœ… | **Area** | **Opportunity** | **Innovation Keywords** | |---------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------| | **Content Generation** | Enable consumers to create art, music, videos, or graphics without complex training. | **Bridges creativity and craft**, making imagination a reality. | | **Content Editing** | Automate editing workflows and introduce AI-native edits. | **Compose, refine, remix** content seamlessly. | | **Productivity** | Transform tasks into actions, providing leverage on time. | **Executing tasks** and **giving leverage** on time. | ## High Opportunities for 100x 🌟 ### Content Generation | **What We're Looking For** | **Details** | **Emoji** | |----------------------------------|-----------------------------------------------------------------------------------------------------|-----------| | **Killing the "blank page problem"** | From text prompts to slide decks, generation products that **create content** from "blank pages". | πŸ“βž‘οΈπŸŒŸ | | **Making open source models accessible** | Products that **utilize tech** in the browser or app, making open-source models accessible. | πŸ’»πŸŒ | | **Creating remixable outputs** | Platforms that allow creators to **make work instantly remixable**, enhancing creativity. | πŸ”πŸŽ¨ | ### Content Editing | **What We're Looking For** | **Details** | **Emoji** | |----------------------------------|-----------------------------------------------------------------------------------------------|-----------| | **Owning multi-media workflows** | Workflow products that allow users to **generate, refine, and stitch different content types**. | πŸ–ΌοΈ+🎡 | | **Enabling in-platform refinement** | AI products that help users **automatically improve** their creations. | βœ¨πŸ”§ | | **Iterating with intelligent editors** | Products that enable users to **refine existing outputs** without starting from scratch. | πŸ”„βœοΈ | ### Productivity | **What We're Looking For** | **Details** | **Emoji** | |------------------------------|-----------------------------------------------------------------------------------------------------|-----------| | **Agents that act as systems of action** | General and specialized agents that **complete tasks**, like booking restaurants or analyzing data. | πŸ€–πŸ’Ό | | **Voice-first apps** | AI apps that prioritize **voice input**, making interaction more natural. | πŸ—£οΈπŸ“± | | **Apps that provide in-flow assistance** | Tools that **minimize context switching** by offering information and actions within workflow. | πŸ”„πŸ› οΈ | ## Companionship + Social πŸ§‘β€πŸ€β€πŸ§‘πŸŽ‰ | **Area** | **Opportunity** | **Innovation Keywords** | |---------------------|---------------------------------------------------------------------|--------------------------------------------------------------| | **Companionship** | AI offers an **infinitely patient and engaging friend**. | **Engaging in conversation** about any topic. | | **Social** | Enhancing interactions and helping **meet new people**. | **Fun interactions** and **enhanced matchmaking**. | ## Personal Growth 🌱 | **Area** | **Opportunity** | **Innovation Keywords** | |---------------------|---------------------------------------------------------------------|--------------------------------------------------------------| | **Education** | Personalized learning environments for every consumer. | **Personalized support** at a lower cost. | | **Personal Finance**| AI-driven financial advice and portfolio management. | **Money on autopilot** and **self-managing assets**. | | **Wellness** | Judgment-free expert advice for a better future. | **Judgment-free experts** and **personalized wellness plans**.| This table encapsulates the essence of AI's transformative potential across creativity, productivity, companionship, social engagement, and personal growth. By focusing on these key areas and innovation keywords, we identify the high-impact opportunities where AI can multiply value and redefine experiences. """) def load_example_files(): # Exclude specific files excluded_files = {'freeze.txt', 'requirements.txt', 'packages.txt', 'pre-requirements.txt'} # List all .txt files excluding the ones in excluded_files example_files = [f for f in os.listdir() if f.endswith('.txt') and f not in excluded_files] # Check if there are any files to select from if example_files: selected_file = st.selectbox("πŸ“„ Select an example file:", example_files) if st.button(f"πŸ“‚ Load {selected_file}"): with open(selected_file, 'r', encoding="utf-8") as file: return file.read() else: st.write("No suitable example files found.") return None # Load example files def load_example_files_old(): example_files = [f for f in os.listdir() if f.endswith('.txt')] selected_file = st.selectbox("πŸ“„ Select an example file:", example_files) if st.button(f"πŸ“‚ Load {selected_file}"): with open(selected_file, 'r', encoding="utf-8") as file: return file.read() return None # Main code for UI uploaded_file = st.file_uploader("πŸ“ Choose a .txt file", type=['txt']) example_text = load_example_files() if example_text: file_text = example_text elif uploaded_file: file_text = uploaded_file.read().decode("utf-8") else: file_text = "" if file_text: text_without_timestamps = remove_timestamps(file_text) top_words = extract_high_information_words(text_without_timestamps, 10) with st.expander("πŸ“Š Top 10 High Information Words"): st.write(top_words) with st.expander("πŸ“ˆ Relationship Graph"): display_relationship_graph(top_words) context_words = extract_context_words(text_without_timestamps, top_words) with st.expander("πŸ”— Context Graph"): display_context_graph(context_words) with st.expander("πŸ“‘ Context Table"): display_context_table(context_words) with st.expander("Innovation Outlines"): showInnovationOutlines()