File size: 6,474 Bytes
41a3de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
import torch
import torch.nn as nn
import json
import math

# Configure the page for a wide layout.
st.set_page_config(page_title="Code Conversion Tool", layout="wide")

# Inject custom CSS for a modern, centered card design with a gradient background.
st.markdown(
    """

    <style>

    /* Set a subtle gradient background for the page */

    body {

        background: linear-gradient(135deg, #ece9e6, #ffffff);

        font-family: 'Helvetica Neue', sans-serif;

    }

    /* Center container for the main app */

    .main-container {

        max-width: 800px;

        margin: 3rem auto;

        padding: 1rem;

    }

    /* Card style for a clean content box */

    .card {

        background: #ffffff;

        border-radius: 10px;

        box-shadow: 0px 4px 8px rgba(0,0,0,0.1);

        padding: 2rem;

    }

    /* Center headings and remove underline */

    h1, h2, h3 {

        text-align: center;

        text-decoration: none;

    }

    /* Style for the translation button */

    .stButton>button {

        background-color: #4CAF50;

        color: white;

        border: none;

        padding: 0.5rem 1.5rem;

        border-radius: 5px;

        font-size: 1rem;

        cursor: pointer;

    }

    .stButton>button:hover {

        background-color: #45a049;

    }

    </style>

    """,
    unsafe_allow_html=True
)

# Wrap the app content in a centered container.
with st.container():
    # Change the title here.
    st.title("Code Conversion Tool")

    # Load vocabulary directly (no sidebar)
    with open("vocabulary.json", "r") as f:
        vocab = json.load(f)

    # Define separate configuration classes
    class PseudoToCppConfig:
        # Config for Pseudocode β†’ C++ model
        vocab_size = 12006
        max_length = 100
        embed_dim = 256
        num_heads = 4
        num_layers = 3
        feedforward_dim = 512
        dropout = 0.2
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    class CppToPseudoConfig:
        # Config for C++ β†’ Pseudocode model
        vocab_size = 12006
        max_length = 100
        embed_dim = 256
        num_heads = 8
        num_layers = 2
        feedforward_dim = 512
        dropout = 0.1
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Positional Encoding
    class PositionalEncoding(nn.Module):
        def __init__(self, embed_dim, max_len=100):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len, embed_dim)
            position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
            div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim))
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)
            self.pe = pe.unsqueeze(0)

        def forward(self, x):
            return x + self.pe[:, :x.size(1)].to(x.device)

    # Transformer Model
    class Seq2SeqTransformer(nn.Module):
        def __init__(self, config):
            super(Seq2SeqTransformer, self).__init__()
            self.config = config
            self.embedding = nn.Embedding(config.vocab_size, config.embed_dim)
            self.positional_encoding = PositionalEncoding(config.embed_dim, config.max_length)
            self.transformer = nn.Transformer(
                d_model=config.embed_dim,
                nhead=config.num_heads,
                num_encoder_layers=config.num_layers,
                num_decoder_layers=config.num_layers,
                dim_feedforward=config.feedforward_dim,
                dropout=config.dropout
            )
            self.fc_out = nn.Linear(config.embed_dim, config.vocab_size)

        def forward(self, src, tgt):
            src_emb = self.embedding(src) * math.sqrt(self.config.embed_dim)
            tgt_emb = self.embedding(tgt) * math.sqrt(self.config.embed_dim)
            src_emb = self.positional_encoding(src_emb)
            tgt_emb = self.positional_encoding(tgt_emb)
            out = self.transformer(src_emb.permute(1, 0, 2), tgt_emb.permute(1, 0, 2))
            out = self.fc_out(out.permute(1, 0, 2))
            return out

    # Load Models with the appropriate configuration
    @st.cache_resource
    def load_model(path, config):
        model = Seq2SeqTransformer(config).to(config.device)
        model.load_state_dict(torch.load(path, map_location=config.device))
        model.eval()
        return model

    cpp_to_pseudo_model = load_model("cpp_to_pseudo_epoch_1.pth", CppToPseudoConfig)
    pseudo_to_cpp_model = load_model("transformer_epoch_1.pth", PseudoToCppConfig)

    # Translation Function
    def translate(model, input_tokens, vocab, device, max_length=50):
        model.eval()
        input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens]
        input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)
        output_ids = [vocab["<start>"]]
        for _ in range(max_length):
            output_tensor = torch.tensor(output_ids, dtype=torch.long).unsqueeze(0).to(device)
            with torch.no_grad():
                predictions = model(input_tensor, output_tensor)
            next_token_id = predictions.argmax(dim=-1)[:, -1].item()
            output_ids.append(next_token_id)
            if next_token_id == vocab["<end>"]:
                break
        id_to_token = {idx: token for token, idx in vocab.items()}
        return " ".join([id_to_token.get(idx, "<unk>") for idx in output_ids[1:]])

    # UI Elements for Translation
    mode = st.radio("Select Translation Mode", ("C++ β†’ Pseudocode", "Pseudocode β†’ C++"))
    user_input = st.text_area("Enter code:")

    if st.button("Translate"):
        tokens = user_input.strip().split()
        if mode == "C++ β†’ Pseudocode":
            translated_code = translate(cpp_to_pseudo_model, tokens, vocab, CppToPseudoConfig.device)
        else:
            translated_code = translate(pseudo_to_cpp_model, tokens, vocab, PseudoToCppConfig.device)
        st.subheader("Generated Translation:")
        st.code(translated_code, language="cpp" if mode == "Pseudocode β†’ C++" else "python")

    st.markdown('</div>', unsafe_allow_html=True)
    st.markdown('</div>', unsafe_allow_html=True)