Spaces:
Runtime error
Runtime error
import torch | |
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments, pipeline | |
from datasets import load_dataset | |
import gradio as gr | |
# Шаг 1: Загружаем и подготавливаем датасеты | |
datasets = [ | |
load_dataset('squad'), | |
load_dataset('conll2003'), | |
load_dataset('glue', 'mrpc'), | |
load_dataset('trec'), | |
load_dataset('babi') | |
] | |
# Шаг 2: Загружаем модель и токенизатор | |
model_name = 'distilbert-base-uncased' | |
tokenizer = DistilBertTokenizer.from_pretrained(model_name) | |
model = DistilBertForSequenceClassification.from_pretrained(model_name) | |
# Шаг 3: Токенизация и тренировка модели | |
def tokenize_function(examples): | |
return tokenizer(examples["text"], padding="max_length", truncation=True) | |
tokenized_datasets = [] | |
for ds in datasets: | |
tokenized_ds = ds.map(tokenize_function, batched=True) | |
tokenized_datasets.append(tokenized_ds) | |
# Шаг 4: Оптимизация модели с помощью quantization | |
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8) | |
# Шаг 5: Создание функции для классификации текста | |
def classify_text(text): | |
tokens = tokenizer(text, return_tensors="pt") | |
outputs = model(**tokens) | |
return torch.nn.functional.softmax(outputs.logits, dim=-1).tolist() | |
# Шаг 6: Настройка Gradio интерфейса | |
interface = gr.Interface(fn=classify_text, inputs="text", outputs="json") | |
interface.launch() |