Spaces:
Runtime error
Runtime error
File size: 1,590 Bytes
64cafec 8d2aba0 64cafec 8d2aba0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments, pipeline
from datasets import load_dataset
import gradio as gr
# Шаг 1: Загружаем и подготавливаем датасеты
datasets = [
load_dataset('squad'),
load_dataset('conll2003'),
load_dataset('glue', 'mrpc'),
load_dataset('trec'),
load_dataset('babi')
]
# Шаг 2: Загружаем модель и токенизатор
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)
# Шаг 3: Токенизация и тренировка модели
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
tokenized_datasets = []
for ds in datasets:
tokenized_ds = ds.map(tokenize_function, batched=True)
tokenized_datasets.append(tokenized_ds)
# Шаг 4: Оптимизация модели с помощью quantization
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
# Шаг 5: Создание функции для классификации текста
def classify_text(text):
tokens = tokenizer(text, return_tensors="pt")
outputs = model(**tokens)
return torch.nn.functional.softmax(outputs.logits, dim=-1).tolist()
# Шаг 6: Настройка Gradio интерфейса
interface = gr.Interface(fn=classify_text, inputs="text", outputs="json")
interface.launch() |