Spaces:

reh1t
/

CodeConversionTool

Sleeping

File size: 6,474 Bytes

41a3de9

import streamlit as st
import torch
import torch.nn as nn
import json
import math

# Configure the page for a wide layout.
st.set_page_config(page_title="Code Conversion Tool", layout="wide")

# Inject custom CSS for a modern, centered card design with a gradient background.
st.markdown(
    """

    <style>

    /* Set a subtle gradient background for the page */

    body {

        background: linear-gradient(135deg, #ece9e6, #ffffff);

        font-family: 'Helvetica Neue', sans-serif;

    }

    /* Center container for the main app */

    .main-container {

        max-width: 800px;

        margin: 3rem auto;

        padding: 1rem;

    }

    /* Card style for a clean content box */

    .card {

        background: #ffffff;

        border-radius: 10px;

        box-shadow: 0px 4px 8px rgba(0,0,0,0.1);

        padding: 2rem;

    }

    /* Center headings and remove underline */

    h1, h2, h3 {

        text-align: center;

        text-decoration: none;

    }

    /* Style for the translation button */

    .stButton>button {

        background-color: #4CAF50;

        color: white;

        border: none;

        padding: 0.5rem 1.5rem;

        border-radius: 5px;

        font-size: 1rem;

        cursor: pointer;

    }

    .stButton>button:hover {

        background-color: #45a049;

    }

    </style>

    """,
    unsafe_allow_html=True
)

# Wrap the app content in a centered container.
with st.container():
    # Change the title here.
    st.title("Code Conversion Tool")

    # Load vocabulary directly (no sidebar)
    with open("vocabulary.json", "r") as f:
        vocab = json.load(f)

    # Define separate configuration classes
    class PseudoToCppConfig:
        # Config for Pseudocode → C++ model
        vocab_size = 12006
        max_length = 100
        embed_dim = 256
        num_heads = 4
        num_layers = 3
        feedforward_dim = 512
        dropout = 0.2
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    class CppToPseudoConfig:
        # Config for C++ → Pseudocode model
        vocab_size = 12006
        max_length = 100
        embed_dim = 256
        num_heads = 8
        num_layers = 2
        feedforward_dim = 512
        dropout = 0.1
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Positional Encoding
    class PositionalEncoding(nn.Module):
        def __init__(self, embed_dim, max_len=100):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len, embed_dim)
            position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
            div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim))
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)
            self.pe = pe.unsqueeze(0)

        def forward(self, x):
            return x + self.pe[:, :x.size(1)].to(x.device)

    # Transformer Model
    class Seq2SeqTransformer(nn.Module):
        def __init__(self, config):
            super(Seq2SeqTransformer, self).__init__()
            self.config = config
            self.embedding = nn.Embedding(config.vocab_size, config.embed_dim)
            self.positional_encoding = PositionalEncoding(config.embed_dim, config.max_length)
            self.transformer = nn.Transformer(
                d_model=config.embed_dim,
                nhead=config.num_heads,
                num_encoder_layers=config.num_layers,
                num_decoder_layers=config.num_layers,
                dim_feedforward=config.feedforward_dim,
                dropout=config.dropout
            )
            self.fc_out = nn.Linear(config.embed_dim, config.vocab_size)

        def forward(self, src, tgt):
            src_emb = self.embedding(src) * math.sqrt(self.config.embed_dim)
            tgt_emb = self.embedding(tgt) * math.sqrt(self.config.embed_dim)
            src_emb = self.positional_encoding(src_emb)
            tgt_emb = self.positional_encoding(tgt_emb)
            out = self.transformer(src_emb.permute(1, 0, 2), tgt_emb.permute(1, 0, 2))
            out = self.fc_out(out.permute(1, 0, 2))
            return out

    # Load Models with the appropriate configuration
    @st.cache_resource
    def load_model(path, config):
        model = Seq2SeqTransformer(config).to(config.device)
        model.load_state_dict(torch.load(path, map_location=config.device))
        model.eval()
        return model

    cpp_to_pseudo_model = load_model("cpp_to_pseudo_epoch_1.pth", CppToPseudoConfig)
    pseudo_to_cpp_model = load_model("transformer_epoch_1.pth", PseudoToCppConfig)

    # Translation Function
    def translate(model, input_tokens, vocab, device, max_length=50):
        model.eval()
        input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens]
        input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)
        output_ids = [vocab["<start>"]]
        for _ in range(max_length):
            output_tensor = torch.tensor(output_ids, dtype=torch.long).unsqueeze(0).to(device)
            with torch.no_grad():
                predictions = model(input_tensor, output_tensor)
            next_token_id = predictions.argmax(dim=-1)[:, -1].item()
            output_ids.append(next_token_id)
            if next_token_id == vocab["<end>"]:
                break
        id_to_token = {idx: token for token, idx in vocab.items()}
        return " ".join([id_to_token.get(idx, "<unk>") for idx in output_ids[1:]])

    # UI Elements for Translation
    mode = st.radio("Select Translation Mode", ("C++ → Pseudocode", "Pseudocode → C++"))
    user_input = st.text_area("Enter code:")

    if st.button("Translate"):
        tokens = user_input.strip().split()
        if mode == "C++ → Pseudocode":
            translated_code = translate(cpp_to_pseudo_model, tokens, vocab, CppToPseudoConfig.device)
        else:
            translated_code = translate(pseudo_to_cpp_model, tokens, vocab, PseudoToCppConfig.device)
        st.subheader("Generated Translation:")
        st.code(translated_code, language="cpp" if mode == "Pseudocode → C++" else "python")

    st.markdown('</div>', unsafe_allow_html=True)
    st.markdown('</div>', unsafe_allow_html=True)