import streamlit as st import numpy as np import torch from torch import nn import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers # Page title st.title('Advanced Deep Learning for NLP') # Word Embeddings: Word2Vec, GloVe, FastText st.header('1. Word Embeddings') st.subheader('Definition:') st.write(""" Word embeddings are dense vector representations of words, where words with similar meanings have similar vector representations. They allow machine learning models to work with text data more efficiently. - **Word2Vec (Skip-gram, CBOW)**: A model that learns word representations based on the context of words. - **Skip-gram**: Predicts surrounding words from a given word. - **CBOW (Continuous Bag of Words)**: Predicts the target word from surrounding context words. - **GloVe (Global Vectors for Word Representation)**: A count-based model that learns word vectors by leveraging the global word-word co-occurrence statistics in a corpus. - **FastText**: Similar to Word2Vec but also takes subword information into account, helping with out-of-vocabulary words. """) # Example: Word2Vec st.subheader('Word2Vec Example:') sentence = st.text_area("Enter a sentence to visualize Word2Vec embeddings", "NLP is amazing and very useful.") if st.button('Visualize Word2Vec'): # Simulate Word2Vec output by showing the word embeddings words = sentence.split() embeddings = {word: np.random.rand(1, 50) for word in words} # Fake embeddings for demonstration st.write("Word2Vec Embeddings (Random Example):") for word, emb in embeddings.items(): st.write(f"{word}: {emb.flatten()[:5]}...") # Display first 5 values of fake embedding # Sequence Models: RNNs, LSTMs, GRUs st.header('2. Sequence Models') st.subheader('Definition:') st.write(""" Sequence models are used to process sequential data (like sentences), where the order of the data matters. They are key in NLP tasks like translation, summarization, and sentiment analysis. - **RNNs (Recurrent Neural Networks)**: RNNs process sequences by using a loop to pass information from one step to the next. They are simple but struggle with long-term dependencies. - **LSTMs (Long Short-Term Memory)**: LSTMs are a type of RNN designed to handle long-range dependencies by using gating mechanisms to control the flow of information. - **GRUs (Gated Recurrent Units)**: GRUs are similar to LSTMs but with a simplified architecture that still helps capture long-range dependencies effectively. """) # Example: RNN with PyTorch (Simple architecture for demonstration) st.subheader('RNN Example (PyTorch):') if st.button('Show RNN Model Architecture'): # Basic RNN model using PyTorch class SimpleRNN(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(SimpleRNN, self).__init__() self.rnn = nn.RNN(input_size, hidden_size, batch_first=True) self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): out, _ = self.rnn(x) out = self.fc(out[:, -1, :]) # Get the last output of the sequence return out rnn_model = SimpleRNN(input_size=10, hidden_size=20, output_size=1) st.write("RNN Architecture:") st.write(rnn_model) # Attention Mechanisms: Self-attention, Seq2Seq, Transformer st.header('3. Attention Mechanisms') st.subheader('Definition:') st.write(""" Attention mechanisms allow models to focus on specific parts of an input sequence when making predictions, which improves their performance on tasks that require understanding long-range dependencies. - **Self-attention**: A mechanism where a model looks at different parts of the sequence (or the entire sequence) to compute a weighted representation for each element. - **Seq2Seq Models**: Sequence-to-sequence models use an encoder-decoder architecture, where the encoder processes the input sequence, and the decoder generates the output sequence. - **Transformer**: The Transformer model relies on self-attention to capture relationships between all words in a sentence, making it very efficient for NLP tasks like translation, summarization, and language generation. """) # Example: Transformer architecture (simplified explanation) st.subheader('Transformer Example (Simplified):') if st.button('Show Transformer Architecture'): transformer_model = keras.Sequential([ layers.InputLayer(input_shape=(None, 512)), # Input size (None for variable-length sequences) layers.MultiHeadAttention(num_heads=8, key_dim=512), layers.GlobalAveragePooling1D(), layers.Dense(256, activation="relu"), layers.Dense(1) ]) st.write("Transformer Architecture (Simplified):") st.write(transformer_model) # Explanation of key attention components st.header('4. Attention Components') st.subheader('Self-attention:') st.write(""" In self-attention, each word in a sequence attends to all other words and assigns a weight (importance) to each. This mechanism allows models to capture long-range dependencies between words in a sentence. """) st.subheader('Seq2Seq:') st.write(""" Seq2Seq models are typically used for tasks like translation, where the model encodes an input sequence (like a sentence) and decodes it to produce an output sequence. """) st.subheader('Transformer:') st.write(""" The Transformer model revolutionized NLP by using self-attention in both the encoder and decoder. It processes all tokens in parallel, making it highly efficient and suitable for large datasets. """)