Spaces:
Sleeping
Sleeping
File size: 6,474 Bytes
41a3de9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import streamlit as st
import torch
import torch.nn as nn
import json
import math
# Configure the page for a wide layout.
st.set_page_config(page_title="Code Conversion Tool", layout="wide")
# Inject custom CSS for a modern, centered card design with a gradient background.
st.markdown(
"""
<style>
/* Set a subtle gradient background for the page */
body {
background: linear-gradient(135deg, #ece9e6, #ffffff);
font-family: 'Helvetica Neue', sans-serif;
}
/* Center container for the main app */
.main-container {
max-width: 800px;
margin: 3rem auto;
padding: 1rem;
}
/* Card style for a clean content box */
.card {
background: #ffffff;
border-radius: 10px;
box-shadow: 0px 4px 8px rgba(0,0,0,0.1);
padding: 2rem;
}
/* Center headings and remove underline */
h1, h2, h3 {
text-align: center;
text-decoration: none;
}
/* Style for the translation button */
.stButton>button {
background-color: #4CAF50;
color: white;
border: none;
padding: 0.5rem 1.5rem;
border-radius: 5px;
font-size: 1rem;
cursor: pointer;
}
.stButton>button:hover {
background-color: #45a049;
}
</style>
""",
unsafe_allow_html=True
)
# Wrap the app content in a centered container.
with st.container():
# Change the title here.
st.title("Code Conversion Tool")
# Load vocabulary directly (no sidebar)
with open("vocabulary.json", "r") as f:
vocab = json.load(f)
# Define separate configuration classes
class PseudoToCppConfig:
# Config for Pseudocode β C++ model
vocab_size = 12006
max_length = 100
embed_dim = 256
num_heads = 4
num_layers = 3
feedforward_dim = 512
dropout = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class CppToPseudoConfig:
# Config for C++ β Pseudocode model
vocab_size = 12006
max_length = 100
embed_dim = 256
num_heads = 8
num_layers = 2
feedforward_dim = 512
dropout = 0.1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Positional Encoding
class PositionalEncoding(nn.Module):
def __init__(self, embed_dim, max_len=100):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, embed_dim)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
self.pe = pe.unsqueeze(0)
def forward(self, x):
return x + self.pe[:, :x.size(1)].to(x.device)
# Transformer Model
class Seq2SeqTransformer(nn.Module):
def __init__(self, config):
super(Seq2SeqTransformer, self).__init__()
self.config = config
self.embedding = nn.Embedding(config.vocab_size, config.embed_dim)
self.positional_encoding = PositionalEncoding(config.embed_dim, config.max_length)
self.transformer = nn.Transformer(
d_model=config.embed_dim,
nhead=config.num_heads,
num_encoder_layers=config.num_layers,
num_decoder_layers=config.num_layers,
dim_feedforward=config.feedforward_dim,
dropout=config.dropout
)
self.fc_out = nn.Linear(config.embed_dim, config.vocab_size)
def forward(self, src, tgt):
src_emb = self.embedding(src) * math.sqrt(self.config.embed_dim)
tgt_emb = self.embedding(tgt) * math.sqrt(self.config.embed_dim)
src_emb = self.positional_encoding(src_emb)
tgt_emb = self.positional_encoding(tgt_emb)
out = self.transformer(src_emb.permute(1, 0, 2), tgt_emb.permute(1, 0, 2))
out = self.fc_out(out.permute(1, 0, 2))
return out
# Load Models with the appropriate configuration
@st.cache_resource
def load_model(path, config):
model = Seq2SeqTransformer(config).to(config.device)
model.load_state_dict(torch.load(path, map_location=config.device))
model.eval()
return model
cpp_to_pseudo_model = load_model("cpp_to_pseudo_epoch_1.pth", CppToPseudoConfig)
pseudo_to_cpp_model = load_model("transformer_epoch_1.pth", PseudoToCppConfig)
# Translation Function
def translate(model, input_tokens, vocab, device, max_length=50):
model.eval()
input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens]
input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)
output_ids = [vocab["<start>"]]
for _ in range(max_length):
output_tensor = torch.tensor(output_ids, dtype=torch.long).unsqueeze(0).to(device)
with torch.no_grad():
predictions = model(input_tensor, output_tensor)
next_token_id = predictions.argmax(dim=-1)[:, -1].item()
output_ids.append(next_token_id)
if next_token_id == vocab["<end>"]:
break
id_to_token = {idx: token for token, idx in vocab.items()}
return " ".join([id_to_token.get(idx, "<unk>") for idx in output_ids[1:]])
# UI Elements for Translation
mode = st.radio("Select Translation Mode", ("C++ β Pseudocode", "Pseudocode β C++"))
user_input = st.text_area("Enter code:")
if st.button("Translate"):
tokens = user_input.strip().split()
if mode == "C++ β Pseudocode":
translated_code = translate(cpp_to_pseudo_model, tokens, vocab, CppToPseudoConfig.device)
else:
translated_code = translate(pseudo_to_cpp_model, tokens, vocab, PseudoToCppConfig.device)
st.subheader("Generated Translation:")
st.code(translated_code, language="cpp" if mode == "Pseudocode β C++" else "python")
st.markdown('</div>', unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
|