Spaces:

awacke1
/

Transcript-EDA-NLTK

Sleeping

File size: 11,020 Bytes

# Import necessary libraries
import streamlit as st
import re
import nltk
import os
from nltk.corpus import stopwords
from nltk import FreqDist
from graphviz import Digraph

# Set page configuration with a title and favicon
st.set_page_config(
    page_title="📺Transcript📜EDA🔍NLTK",
    page_icon="🌠",
    layout="wide",
    initial_sidebar_state="expanded",
    menu_items={
        'Get Help': 'https://huggingface.co/awacke1',
        'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
        'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
    }
)

st.markdown('''🔍 **Exploratory Data Analysis (EDA)** 📊:     - Dive deep into the sea of data with our EDA feature, unveiling hidden patterns 🕵️‍♂️ and insights 🧠 in your transcripts. Transform raw data into a treasure trove of information 🏆.
📜 **Natural Language Toolkit (NLTK)** 🛠️:     - Harness the power of NLTK to process and understand human language 🗣️. From tokenization to sentiment analysis, our toolkit is your compass 🧭 in the vast landscape of natural language processing (NLP).
📺 **Transcript Analysis** 📈:     - Elevate your text analysis with our advanced transcript analysis tools. Whether it's speech recognition 🎙️ or thematic extraction 🌐, turn your audiovisual content into actionable insights 🔑.''')

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

def remove_timestamps(text):
    return re.sub(r'\d{1,2}:\d{2}\n.*\n', '', text)

def extract_high_information_words(text, top_n=10):
    words = nltk.word_tokenize(text)
    words = [word.lower() for word in words if word.isalpha()]
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    freq_dist = FreqDist(filtered_words)
    return [word for word, _ in freq_dist.most_common(top_n)]

def create_relationship_graph(words):
    graph = Digraph()
    for index, word in enumerate(words):
        graph.node(str(index), word)
        if index > 0:
            graph.edge(str(index - 1), str(index), label=str(index))
    return graph

def display_relationship_graph(words):
    graph = create_relationship_graph(words)
    st.graphviz_chart(graph)

def extract_context_words(text, high_information_words):
    words = nltk.word_tokenize(text)
    context_words = []
    for index, word in enumerate(words):
        if word.lower() in high_information_words:
            before_word = words[index - 1] if index > 0 else None
            after_word = words[index + 1] if index < len(words) - 1 else None
            context_words.append((before_word, word, after_word))
    return context_words

def create_context_graph(context_words):
    graph = Digraph()
    for index, (before_word, high_info_word, after_word) in enumerate(context_words):
        graph.node(f'before{index}', before_word, shape='box') if before_word else None
        graph.node(f'high{index}', high_info_word, shape='ellipse')
        graph.node(f'after{index}', after_word, shape='diamond') if after_word else None
        if before_word:
            graph.edge(f'before{index}', f'high{index}')
        if after_word:
            graph.edge(f'high{index}', f'after{index}')
    return graph

def display_context_graph(context_words):
    graph = create_context_graph(context_words)
    st.graphviz_chart(graph)

def display_context_table(context_words):
    table = "| Before | High Info Word | After |\n|--------|----------------|-------|\n"
    for before, high, after in context_words:
        table += f"| {before if before else ''} | {high} | {after if after else ''} |\n"
    st.markdown(table)

def showInnovationOutlines():
    st.markdown("""
    
# AI App Areas in Demand and Opportunities for 100x 🚀

## Creativity + Productivity 🎨✅

| **Area**            | **Opportunity**                                                                 | **Innovation Keywords**                                             |
|---------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------|
| **Content Generation** | Enable consumers to create art, music, videos, or graphics without complex training. | **Bridges creativity and craft**, making imagination a reality.    |
| **Content Editing**    | Automate editing workflows and introduce AI-native edits.                        | **Compose, refine, remix** content seamlessly.                      |
| **Productivity**       | Transform tasks into actions, providing leverage on time.                        | **Executing tasks** and **giving leverage** on time.                |

## High Opportunities for 100x 🌟

### Content Generation

| **What We're Looking For**       | **Details**                                                                                         | **Emoji** |
|----------------------------------|-----------------------------------------------------------------------------------------------------|-----------|
| **Killing the "blank page problem"** | From text prompts to slide decks, generation products that **create content** from "blank pages".  | 📝➡️🌟    |
| **Making open source models accessible** | Products that **utilize tech** in the browser or app, making open-source models accessible.         | 💻🌐     |
| **Creating remixable outputs**    | Platforms that allow creators to **make work instantly remixable**, enhancing creativity.           | 🔁🎨     |

### Content Editing

| **What We're Looking For**       | **Details**                                                                                   | **Emoji** |
|----------------------------------|-----------------------------------------------------------------------------------------------|-----------|
| **Owning multi-media workflows** | Workflow products that allow users to **generate, refine, and stitch different content types**. | 🖼️+🎵    |
| **Enabling in-platform refinement** | AI products that help users **automatically improve** their creations.                        | ✨🔧     |
| **Iterating with intelligent editors** | Products that enable users to **refine existing outputs** without starting from scratch.       | 🔄✍️    |

### Productivity

| **What We're Looking For**   | **Details**                                                                                         | **Emoji** |
|------------------------------|-----------------------------------------------------------------------------------------------------|-----------|
| **Agents that act as systems of action** | General and specialized agents that **complete tasks**, like booking restaurants or analyzing data. | 🤖💼     |
| **Voice-first apps**         | AI apps that prioritize **voice input**, making interaction more natural.                           | 🗣️📱    |
| **Apps that provide in-flow assistance** | Tools that **minimize context switching** by offering information and actions within workflow.      | 🔄🛠️    |

## Companionship + Social 🧑‍🤝‍🧑🎉

| **Area**            | **Opportunity**                                                     | **Innovation Keywords**                                      |
|---------------------|---------------------------------------------------------------------|--------------------------------------------------------------|
| **Companionship**   | AI offers an **infinitely patient and engaging friend**.            | **Engaging in conversation** about any topic.                 |
| **Social**          | Enhancing interactions and helping **meet new people**.             | **Fun interactions** and **enhanced matchmaking**.            |

## Personal Growth 🌱

| **Area**            | **Opportunity**                                                     | **Innovation Keywords**                                      |
|---------------------|---------------------------------------------------------------------|--------------------------------------------------------------|
| **Education**       | Personalized learning environments for every consumer.              | **Personalized support** at a lower cost.                     |
| **Personal Finance**| AI-driven financial advice and portfolio management.                | **Money on autopilot** and **self-managing assets**.          |
| **Wellness**        | Judgment-free expert advice for a better future.                    | **Judgment-free experts** and **personalized wellness plans**.|

This table encapsulates the essence of AI's transformative potential across creativity, productivity, companionship, social engagement, and personal growth. By focusing on these key areas and innovation keywords, we identify the high-impact opportunities where AI can multiply value and redefine experiences.

    
    """)

def load_example_files():
    # Exclude specific files
    excluded_files = {'freeze.txt', 'requirements.txt', 'packages.txt', 'pre-requirements.txt'}
    
    # List all .txt files excluding the ones in excluded_files
    example_files = [f for f in os.listdir() if f.endswith('.txt') and f not in excluded_files]
    
    # Check if there are any files to select from
    if example_files:
        selected_file = st.selectbox("📄 Select an example file:", example_files)
        if st.button(f"📂 Load {selected_file}"):
            with open(selected_file, 'r', encoding="utf-8") as file:
                return file.read()
    else:
        st.write("No suitable example files found.")
    
    return None
    
# Load example files
def load_example_files_old():
    example_files = [f for f in os.listdir() if f.endswith('.txt')]
    selected_file = st.selectbox("📄 Select an example file:", example_files)
    if st.button(f"📂 Load {selected_file}"):
        with open(selected_file, 'r', encoding="utf-8") as file:
            return file.read()
    return None

# Main code for UI
uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])

example_text = load_example_files()

if example_text:
    file_text = example_text
elif uploaded_file:
    file_text = uploaded_file.read().decode("utf-8")
else:
    file_text = ""

if file_text:
    text_without_timestamps = remove_timestamps(file_text)
    top_words = extract_high_information_words(text_without_timestamps, 10)

    with st.expander("📊 Top 10 High Information Words"):
        st.write(top_words)

    with st.expander("📈 Relationship Graph"):
        display_relationship_graph(top_words)

    context_words = extract_context_words(text_without_timestamps, top_words)

    with st.expander("🔗 Context Graph"):
        display_context_graph(context_words)

    with st.expander("📑 Context Table"):
        display_context_table(context_words)

    with st.expander("Innovation Outlines"):
        showInnovationOutlines()