import streamlit as st
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch
import re

# Функции для обработки кода
def remove_comments(content):
    content = re.sub(r'#.*', '', content)
    content = re.sub(r'""".*?"""', '', content, flags=re.DOTALL)
    content = re.sub(r"'''.*?'''", '', content, flags=re.DOTALL)
    return content

def split_instructions(content):
    content = remove_comments(content)
    lines = content.splitlines()
    lines = [line.rstrip() for line in lines if line.strip()]
    
    instructions = []
    current_instruction = []
    last_indent = None
    block_indent = None

    for line in lines:
        current_indent = len(line) - len(line.lstrip())
        
        if last_indent is None:
            block_indent = current_indent
        elif current_indent <= block_indent:
            if current_instruction:
                instructions.append(' '.join(current_instruction).strip())
                current_instruction = []
            block_indent = current_indent
        
        current_instruction.append(line.strip())
        last_indent = current_indent
    
    if current_instruction:
        instructions.append(' '.join(current_instruction).strip())
    
    return instructions

# Загрузка модели и токенизатора из Hugging Face
model_name = "models"  # Укажите имя репозитория модели на Hugging Face
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# Создание Streamlit интерфейса
st.title("Классификатор кода")

code = st.text_area("Введите ваш код здесь", height=300)


if st.button("Классифицировать код"):
    formatted_code = '\n'.join(split_instructions(code))
    inputs = tokenizer(formatted_code, return_tensors="pt")
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()
    result = "Правильный" if prediction == 1 else "Неправильный"
    st.write(f"Классификация: {result}")


# Личные данные
st.markdown("---")  # Добавляет горизонтальную линию для разделения
st.markdown("**Full Name:** Ilgiz Sulaymanov")
st.markdown("**Place of Study:** Zhusup Balasagyn Kyrgyz National University")
st.markdown("**Email:** sulaymanovilgiz00@gmail.com")

# Данные обучения
st.markdown("---")  # Добавляет еще одну горизонтальную линию
st.markdown("### Model Training Metrics")
training_metrics = {
    'eval_loss': 0.09344028681516647,
    'eval_runtime': 229.0224,
    'eval_samples_per_second': 35.569,
    'eval_steps_per_second': 4.449,
    'epoch': 1.0
}

st.markdown("**Evaluation Loss:** {:.4f}".format(training_metrics['eval_loss']))
st.markdown("**Evaluation Runtime:** {:.2f} seconds".format(training_metrics['eval_runtime']))
st.markdown("**Samples per Second:** {:.3f}".format(training_metrics['eval_samples_per_second']))
st.markdown("**Steps per Second:** {:.3f}".format(training_metrics['eval_steps_per_second']))
st.markdown("**Epoch:** {:.1f}".format(training_metrics['epoch']))