Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
import json | |
import math | |
# Configure the page for a wide layout. | |
st.set_page_config(page_title="Code Conversion Tool", layout="wide") | |
# Inject custom CSS for a modern, centered card design with a gradient background. | |
st.markdown( | |
""" | |
<style> | |
/* Set a subtle gradient background for the page */ | |
body { | |
background: linear-gradient(135deg, #ece9e6, #ffffff); | |
font-family: 'Helvetica Neue', sans-serif; | |
} | |
/* Center container for the main app */ | |
.main-container { | |
max-width: 800px; | |
margin: 3rem auto; | |
padding: 1rem; | |
} | |
/* Card style for a clean content box */ | |
.card { | |
background: #ffffff; | |
border-radius: 10px; | |
box-shadow: 0px 4px 8px rgba(0,0,0,0.1); | |
padding: 2rem; | |
} | |
/* Center headings and remove underline */ | |
h1, h2, h3 { | |
text-align: center; | |
text-decoration: none; | |
} | |
/* Style for the translation button */ | |
.stButton>button { | |
background-color: #4CAF50; | |
color: white; | |
border: none; | |
padding: 0.5rem 1.5rem; | |
border-radius: 5px; | |
font-size: 1rem; | |
cursor: pointer; | |
} | |
.stButton>button:hover { | |
background-color: #45a049; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# Wrap the app content in a centered container. | |
with st.container(): | |
# Change the title here. | |
st.title("Code Conversion Tool") | |
# Load vocabulary directly (no sidebar) | |
with open("vocabulary.json", "r") as f: | |
vocab = json.load(f) | |
# Define separate configuration classes | |
class PseudoToCppConfig: | |
# Config for Pseudocode β C++ model | |
vocab_size = 12006 | |
max_length = 100 | |
embed_dim = 256 | |
num_heads = 4 | |
num_layers = 3 | |
feedforward_dim = 512 | |
dropout = 0.2 | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
class CppToPseudoConfig: | |
# Config for C++ β Pseudocode model | |
vocab_size = 12006 | |
max_length = 100 | |
embed_dim = 256 | |
num_heads = 8 | |
num_layers = 2 | |
feedforward_dim = 512 | |
dropout = 0.1 | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Positional Encoding | |
class PositionalEncoding(nn.Module): | |
def __init__(self, embed_dim, max_len=100): | |
super(PositionalEncoding, self).__init__() | |
pe = torch.zeros(max_len, embed_dim) | |
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) | |
div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim)) | |
pe[:, 0::2] = torch.sin(position * div_term) | |
pe[:, 1::2] = torch.cos(position * div_term) | |
self.pe = pe.unsqueeze(0) | |
def forward(self, x): | |
return x + self.pe[:, :x.size(1)].to(x.device) | |
# Transformer Model | |
class Seq2SeqTransformer(nn.Module): | |
def __init__(self, config): | |
super(Seq2SeqTransformer, self).__init__() | |
self.config = config | |
self.embedding = nn.Embedding(config.vocab_size, config.embed_dim) | |
self.positional_encoding = PositionalEncoding(config.embed_dim, config.max_length) | |
self.transformer = nn.Transformer( | |
d_model=config.embed_dim, | |
nhead=config.num_heads, | |
num_encoder_layers=config.num_layers, | |
num_decoder_layers=config.num_layers, | |
dim_feedforward=config.feedforward_dim, | |
dropout=config.dropout | |
) | |
self.fc_out = nn.Linear(config.embed_dim, config.vocab_size) | |
def forward(self, src, tgt): | |
src_emb = self.embedding(src) * math.sqrt(self.config.embed_dim) | |
tgt_emb = self.embedding(tgt) * math.sqrt(self.config.embed_dim) | |
src_emb = self.positional_encoding(src_emb) | |
tgt_emb = self.positional_encoding(tgt_emb) | |
out = self.transformer(src_emb.permute(1, 0, 2), tgt_emb.permute(1, 0, 2)) | |
out = self.fc_out(out.permute(1, 0, 2)) | |
return out | |
# Load Models with the appropriate configuration | |
def load_model(path, config): | |
model = Seq2SeqTransformer(config).to(config.device) | |
model.load_state_dict(torch.load(path, map_location=config.device)) | |
model.eval() | |
return model | |
cpp_to_pseudo_model = load_model("cpp_to_pseudo_epoch_1.pth", CppToPseudoConfig) | |
pseudo_to_cpp_model = load_model("transformer_epoch_1.pth", PseudoToCppConfig) | |
# Translation Function | |
def translate(model, input_tokens, vocab, device, max_length=50): | |
model.eval() | |
input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens] | |
input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device) | |
output_ids = [vocab["<start>"]] | |
for _ in range(max_length): | |
output_tensor = torch.tensor(output_ids, dtype=torch.long).unsqueeze(0).to(device) | |
with torch.no_grad(): | |
predictions = model(input_tensor, output_tensor) | |
next_token_id = predictions.argmax(dim=-1)[:, -1].item() | |
output_ids.append(next_token_id) | |
if next_token_id == vocab["<end>"]: | |
break | |
id_to_token = {idx: token for token, idx in vocab.items()} | |
return " ".join([id_to_token.get(idx, "<unk>") for idx in output_ids[1:]]) | |
# UI Elements for Translation | |
mode = st.radio("Select Translation Mode", ("C++ β Pseudocode", "Pseudocode β C++")) | |
user_input = st.text_area("Enter code:") | |
if st.button("Translate"): | |
tokens = user_input.strip().split() | |
if mode == "C++ β Pseudocode": | |
translated_code = translate(cpp_to_pseudo_model, tokens, vocab, CppToPseudoConfig.device) | |
else: | |
translated_code = translate(pseudo_to_cpp_model, tokens, vocab, PseudoToCppConfig.device) | |
st.subheader("Generated Translation:") | |
st.code(translated_code, language="cpp" if mode == "Pseudocode β C++" else "python") | |
st.markdown('</div>', unsafe_allow_html=True) | |
st.markdown('</div>', unsafe_allow_html=True) | |