finecode-python / app.py
ISilgiz's picture
Update app.py
9ea259a verified
import streamlit as st
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch
import re
# Функции для обработки кода
def remove_comments(content):
content = re.sub(r'#.*', '', content)
content = re.sub(r'""".*?"""', '', content, flags=re.DOTALL)
content = re.sub(r"'''.*?'''", '', content, flags=re.DOTALL)
return content
def split_instructions(content):
content = remove_comments(content)
lines = content.splitlines()
lines = [line.rstrip() for line in lines if line.strip()]
instructions = []
current_instruction = []
last_indent = None
block_indent = None
for line in lines:
current_indent = len(line) - len(line.lstrip())
if last_indent is None:
block_indent = current_indent
elif current_indent <= block_indent:
if current_instruction:
instructions.append(' '.join(current_instruction).strip())
current_instruction = []
block_indent = current_indent
current_instruction.append(line.strip())
last_indent = current_indent
if current_instruction:
instructions.append(' '.join(current_instruction).strip())
return instructions
# Загрузка модели и токенизатора из Hugging Face
model_name = "models" # Укажите имя репозитория модели на Hugging Face
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)
# Создание Streamlit интерфейса
st.title("Классификатор кода")
code = st.text_area("Введите ваш код здесь", height=300)
if st.button("Классифицировать код"):
formatted_code = '\n'.join(split_instructions(code))
inputs = tokenizer(formatted_code, return_tensors="pt")
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=1).item()
result = "Правильный" if prediction == 1 else "Неправильный"
st.write(f"Классификация: {result}")
# Личные данные
st.markdown("---") # Добавляет горизонтальную линию для разделения
st.markdown("**Full Name:** Ilgiz Sulaymanov")
st.markdown("**Place of Study:** Zhusup Balasagyn Kyrgyz National University")
st.markdown("**Email:** sulaymanovilgiz00@gmail.com")
# Данные обучения
st.markdown("---") # Добавляет еще одну горизонтальную линию
st.markdown("### Model Training Metrics")
training_metrics = {
'eval_loss': 0.09344028681516647,
'eval_runtime': 229.0224,
'eval_samples_per_second': 35.569,
'eval_steps_per_second': 4.449,
'epoch': 1.0
}
st.markdown("**Evaluation Loss:** {:.4f}".format(training_metrics['eval_loss']))
st.markdown("**Evaluation Runtime:** {:.2f} seconds".format(training_metrics['eval_runtime']))
st.markdown("**Samples per Second:** {:.3f}".format(training_metrics['eval_samples_per_second']))
st.markdown("**Steps per Second:** {:.3f}".format(training_metrics['eval_steps_per_second']))
st.markdown("**Epoch:** {:.1f}".format(training_metrics['epoch']))