import streamlit as st from transformers import RobertaForSequenceClassification, RobertaTokenizer import torch import re # Функции для обработки кода def remove_comments(content): content = re.sub(r'#.*', '', content) content = re.sub(r'""".*?"""', '', content, flags=re.DOTALL) content = re.sub(r"'''.*?'''", '', content, flags=re.DOTALL) return content def split_instructions(content): content = remove_comments(content) lines = content.splitlines() lines = [line.rstrip() for line in lines if line.strip()] instructions = [] current_instruction = [] last_indent = None block_indent = None for line in lines: current_indent = len(line) - len(line.lstrip()) if last_indent is None: block_indent = current_indent elif current_indent <= block_indent: if current_instruction: instructions.append(' '.join(current_instruction).strip()) current_instruction = [] block_indent = current_indent current_instruction.append(line.strip()) last_indent = current_indent if current_instruction: instructions.append(' '.join(current_instruction).strip()) return instructions # Загрузка модели и токенизатора из Hugging Face model_name = "models" # Укажите имя репозитория модели на Hugging Face model = RobertaForSequenceClassification.from_pretrained(model_name) tokenizer = RobertaTokenizer.from_pretrained(model_name) # Создание Streamlit интерфейса st.title("Классификатор кода") code = st.text_area("Введите ваш код здесь", height=300) if st.button("Классифицировать код"): formatted_code = '\n'.join(split_instructions(code)) inputs = tokenizer(formatted_code, return_tensors="pt") outputs = model(**inputs) prediction = torch.argmax(outputs.logits, dim=1).item() result = "Правильный" if prediction == 1 else "Неправильный" st.write(f"Классификация: {result}") # Личные данные st.markdown("---") # Добавляет горизонтальную линию для разделения st.markdown("**Full Name:** Ilgiz Sulaymanov") st.markdown("**Place of Study:** Zhusup Balasagyn Kyrgyz National University") st.markdown("**Email:** sulaymanovilgiz00@gmail.com") # Данные обучения st.markdown("---") # Добавляет еще одну горизонтальную линию st.markdown("### Model Training Metrics") training_metrics = { 'eval_loss': 0.09344028681516647, 'eval_runtime': 229.0224, 'eval_samples_per_second': 35.569, 'eval_steps_per_second': 4.449, 'epoch': 1.0 } st.markdown("**Evaluation Loss:** {:.4f}".format(training_metrics['eval_loss'])) st.markdown("**Evaluation Runtime:** {:.2f} seconds".format(training_metrics['eval_runtime'])) st.markdown("**Samples per Second:** {:.3f}".format(training_metrics['eval_samples_per_second'])) st.markdown("**Steps per Second:** {:.3f}".format(training_metrics['eval_steps_per_second'])) st.markdown("**Epoch:** {:.1f}".format(training_metrics['epoch']))