File size: 3,293 Bytes
f376da5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea259a
 
f376da5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch
import re

# Функции для обработки кода
def remove_comments(content):
    content = re.sub(r'#.*', '', content)
    content = re.sub(r'""".*?"""', '', content, flags=re.DOTALL)
    content = re.sub(r"'''.*?'''", '', content, flags=re.DOTALL)
    return content

def split_instructions(content):
    content = remove_comments(content)
    lines = content.splitlines()
    lines = [line.rstrip() for line in lines if line.strip()]
    
    instructions = []
    current_instruction = []
    last_indent = None
    block_indent = None

    for line in lines:
        current_indent = len(line) - len(line.lstrip())
        
        if last_indent is None:
            block_indent = current_indent
        elif current_indent <= block_indent:
            if current_instruction:
                instructions.append(' '.join(current_instruction).strip())
                current_instruction = []
            block_indent = current_indent
        
        current_instruction.append(line.strip())
        last_indent = current_indent
    
    if current_instruction:
        instructions.append(' '.join(current_instruction).strip())
    
    return instructions

# Загрузка модели и токенизатора из Hugging Face
model_name = "models"  # Укажите имя репозитория модели на Hugging Face
model = RobertaForSequenceClassification.from_pretrained(model_name)
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# Создание Streamlit интерфейса
st.title("Классификатор кода")

code = st.text_area("Введите ваш код здесь", height=300)


if st.button("Классифицировать код"):
    formatted_code = '\n'.join(split_instructions(code))
    inputs = tokenizer(formatted_code, return_tensors="pt")
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()
    result = "Правильный" if prediction == 1 else "Неправильный"
    st.write(f"Классификация: {result}")



# Личные данные
st.markdown("---")  # Добавляет горизонтальную линию для разделения
st.markdown("**Full Name:** Ilgiz Sulaymanov")
st.markdown("**Place of Study:** Zhusup Balasagyn Kyrgyz National University")
st.markdown("**Email:** sulaymanovilgiz00@gmail.com")

# Данные обучения
st.markdown("---")  # Добавляет еще одну горизонтальную линию
st.markdown("### Model Training Metrics")
training_metrics = {
    'eval_loss': 0.09344028681516647,
    'eval_runtime': 229.0224,
    'eval_samples_per_second': 35.569,
    'eval_steps_per_second': 4.449,
    'epoch': 1.0
}

st.markdown("**Evaluation Loss:** {:.4f}".format(training_metrics['eval_loss']))
st.markdown("**Evaluation Runtime:** {:.2f} seconds".format(training_metrics['eval_runtime']))
st.markdown("**Samples per Second:** {:.3f}".format(training_metrics['eval_samples_per_second']))
st.markdown("**Steps per Second:** {:.3f}".format(training_metrics['eval_steps_per_second']))
st.markdown("**Epoch:** {:.1f}".format(training_metrics['epoch']))