{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import import_ipynb\n", "from Encoder import Encoder, EncoderBlock, MultiHeadAttentionBlock, FeedForwardBlock, InputEmbeddingsLayer, PositionalEncodingLayer\n", "from Decoder import Decoder, DecoderBlock, MultiHeadAttentionBlock, FeedForwardBlock, InputEmbeddingsLayer, PositionalEncodingLayer\n", "\n", "import torch\n", "import torch.nn as nn \n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "class LinearLayer(nn.Module):\n", "\n", " def __init__(self, d_model: int, vocab_size: int) -> None:\n", " super().__init__()\n", " self.Linear = nn.Linear(d_model, vocab_size)\n", "\n", " def forward(self, x):\n", " return self.Linear(x)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "class TransformerBlock(nn.Module):\n", "\n", " def __init__(self, encoder: Encoder, decoder: Decoder, source_embedding: InputEmbeddingsLayer, target_embedding: InputEmbeddingsLayer, source_position: PositionalEncodingLayer, target_position: PositionalEncodingLayer, Linear: LinearLayer) -> None:\n", " super().__init__()\n", " self.encoder = encoder \n", " self.decoder = decoder \n", " self.source_embedding = source_embedding\n", " self.target_embedding = target_embedding\n", " self.source_position = source_position\n", " self.target_position = target_position\n", " self.Linear = Linear\n", "\n", " def encode(self, source_language, source_mask):\n", " source_language = self.source_embedding(source_language)\n", " source_language = self.source_position(source_language)\n", " return self.encoder(source_language, source_mask)\n", "\n", " def decode(self, Encoder_output, source_mask, target_language, target_mask):\n", " target_language = self.target_embedding(target_language)\n", " target_language = self.target_position(target_language)\n", " return self.decoder(target_language, Encoder_output, source_mask, target_mask)\n", "\n", " def linear(self, x):\n", " return self.Linear(x)\n", " \n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def Transformer_Model(source_vocab_size: int, target_vocab_size: int, source_sequence_length: int, target_sequence_length: int, d_model: int = 512, Layers: int = 6, heads: int = 8, dropout: float = 0.1, d_ff: int = 2048)->TransformerBlock:\n", "\n", " source_embedding = InputEmbeddingsLayer(d_model, source_vocab_size)\n", " target_embedding = InputEmbeddingsLayer(d_model, target_vocab_size)\n", "\n", " source_position = PositionalEncodingLayer(d_model, source_sequence_length, dropout)\n", " target_position = PositionalEncodingLayer(d_model, target_sequence_length, dropout)\n", "\n", " EncoderBlocks = []\n", " for _ in range(Layers):\n", " encoder_self_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n", " encoder_feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n", " encoder_block = EncoderBlock(encoder_self_attention_block, encoder_feed_forward_block, dropout)\n", " EncoderBlocks.append(encoder_block)\n", "\n", " DecoderBlocks = []\n", " for _ in range(Layers):\n", " decoder_self_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n", " decoder_cross_attention_block = MultiHeadAttentionBlock(d_model, heads, dropout)\n", " decoder_feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n", " decoder_block = DecoderBlock(decoder_self_attention_block, decoder_cross_attention_block, decoder_feed_forward_block, dropout)\n", " DecoderBlocks.append(decoder_block)\n", "\n", " encoder = Encoder(nn.ModuleList(EncoderBlocks))\n", " decoder = Decoder(nn.ModuleList(DecoderBlocks))\n", "\n", " linear = LinearLayer(d_model, target_vocab_size)\n", "\n", " Transformer = TransformerBlock(encoder, decoder, source_embedding, target_embedding, source_position, target_position, linear)\n", " \n", " for t in Transformer.parameters():\n", " if t.dim() > 1:\n", " nn.init.xavier_uniform(t)\n", "\n", " return Transformer\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "5f594f1fbc6ec12c92a2efee092a20dcfd0697dc036fc348ba81f2fc261c5e29" }, "kernelspec": { "display_name": "Python 3.11.5 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }