Spaces:

reh1t
/

CodeConversionTool

Sleeping

App Files Files Community

CodeConversionTool / app.py

reh1t

uploaded project

41a3de9 verified 6 months ago

raw

history blame contribute delete

6.47 kB

	import streamlit as st
	import torch
	import torch.nn as nn
	import json
	import math

	# Configure the page for a wide layout.
	st.set_page_config(page_title="Code Conversion Tool", layout="wide")

	# Inject custom CSS for a modern, centered card design with a gradient background.
	st.markdown(
	"""
	<style>
	/* Set a subtle gradient background for the page */
	body {
	background: linear-gradient(135deg, #ece9e6, #ffffff);
	font-family: 'Helvetica Neue', sans-serif;
	}
	/* Center container for the main app */
	.main-container {
	max-width: 800px;
	margin: 3rem auto;
	padding: 1rem;
	}
	/* Card style for a clean content box */
	.card {
	background: #ffffff;
	border-radius: 10px;
	box-shadow: 0px 4px 8px rgba(0,0,0,0.1);
	padding: 2rem;
	}
	/* Center headings and remove underline */
	h1, h2, h3 {
	text-align: center;
	text-decoration: none;
	}
	/* Style for the translation button */
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	border: none;
	padding: 0.5rem 1.5rem;
	border-radius: 5px;
	font-size: 1rem;
	cursor: pointer;
	}
	.stButton>button:hover {
	background-color: #45a049;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Wrap the app content in a centered container.
	with st.container():
	# Change the title here.
	st.title("Code Conversion Tool")

	# Load vocabulary directly (no sidebar)
	with open("vocabulary.json", "r") as f:
	vocab = json.load(f)

	# Define separate configuration classes
	class PseudoToCppConfig:
	# Config for Pseudocode → C++ model
	vocab_size = 12006
	max_length = 100
	embed_dim = 256
	num_heads = 4
	num_layers = 3
	feedforward_dim = 512
	dropout = 0.2
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	class CppToPseudoConfig:
	# Config for C++ → Pseudocode model
	vocab_size = 12006
	max_length = 100
	embed_dim = 256
	num_heads = 8
	num_layers = 2
	feedforward_dim = 512
	dropout = 0.1
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Positional Encoding
	class PositionalEncoding(nn.Module):
	def __init__(self, embed_dim, max_len=100):
	super(PositionalEncoding, self).__init__()
	pe = torch.zeros(max_len, embed_dim)
	position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
	div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(10000.0) / embed_dim))
	pe[:, 0::2] = torch.sin(position * div_term)
	pe[:, 1::2] = torch.cos(position * div_term)
	self.pe = pe.unsqueeze(0)

	def forward(self, x):
	return x + self.pe[:, :x.size(1)].to(x.device)

	# Transformer Model
	class Seq2SeqTransformer(nn.Module):
	def __init__(self, config):
	super(Seq2SeqTransformer, self).__init__()
	self.config = config
	self.embedding = nn.Embedding(config.vocab_size, config.embed_dim)
	self.positional_encoding = PositionalEncoding(config.embed_dim, config.max_length)
	self.transformer = nn.Transformer(
	d_model=config.embed_dim,
	nhead=config.num_heads,
	num_encoder_layers=config.num_layers,
	num_decoder_layers=config.num_layers,
	dim_feedforward=config.feedforward_dim,
	dropout=config.dropout
	)
	self.fc_out = nn.Linear(config.embed_dim, config.vocab_size)

	def forward(self, src, tgt):
	src_emb = self.embedding(src) * math.sqrt(self.config.embed_dim)
	tgt_emb = self.embedding(tgt) * math.sqrt(self.config.embed_dim)
	src_emb = self.positional_encoding(src_emb)
	tgt_emb = self.positional_encoding(tgt_emb)
	out = self.transformer(src_emb.permute(1, 0, 2), tgt_emb.permute(1, 0, 2))
	out = self.fc_out(out.permute(1, 0, 2))
	return out

	# Load Models with the appropriate configuration
	@st.cache_resource
	def load_model(path, config):
	model = Seq2SeqTransformer(config).to(config.device)
	model.load_state_dict(torch.load(path, map_location=config.device))
	model.eval()
	return model

	cpp_to_pseudo_model = load_model("cpp_to_pseudo_epoch_1.pth", CppToPseudoConfig)
	pseudo_to_cpp_model = load_model("transformer_epoch_1.pth", PseudoToCppConfig)

	# Translation Function
	def translate(model, input_tokens, vocab, device, max_length=50):
	model.eval()
	input_ids = [vocab.get(token, vocab["<unk>"]) for token in input_tokens]
	input_tensor = torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)
	output_ids = [vocab["<start>"]]
	for _ in range(max_length):
	output_tensor = torch.tensor(output_ids, dtype=torch.long).unsqueeze(0).to(device)
	with torch.no_grad():
	predictions = model(input_tensor, output_tensor)
	next_token_id = predictions.argmax(dim=-1)[:, -1].item()
	output_ids.append(next_token_id)
	if next_token_id == vocab["<end>"]:
	break
	id_to_token = {idx: token for token, idx in vocab.items()}
	return " ".join([id_to_token.get(idx, "<unk>") for idx in output_ids[1:]])

	# UI Elements for Translation
	mode = st.radio("Select Translation Mode", ("C++ → Pseudocode", "Pseudocode → C++"))
	user_input = st.text_area("Enter code:")

	if st.button("Translate"):
	tokens = user_input.strip().split()
	if mode == "C++ → Pseudocode":
	translated_code = translate(cpp_to_pseudo_model, tokens, vocab, CppToPseudoConfig.device)
	else:
	translated_code = translate(pseudo_to_cpp_model, tokens, vocab, PseudoToCppConfig.device)
	st.subheader("Generated Translation:")
	st.code(translated_code, language="cpp" if mode == "Pseudocode → C++" else "python")

	st.markdown('</div>', unsafe_allow_html=True)
	st.markdown('</div>', unsafe_allow_html=True)