Spaces:

Awlly
/

NLP_app

Sleeping

NLP_app / models /rubert_MODEL.py

adjusted visuals of the app

2f2be53 over 1 year ago

1.33 kB

	from transformers import AutoModel, AutoTokenizer
	import torch
	import numpy as np
	from sklearn.linear_model import LogisticRegression
	import joblib



	# Load RuBERT model and tokenizer
	rubert_model_name = "cointegrated/rubert-tiny2" # Example model name, adjust as needed
	tokenizer = AutoTokenizer.from_pretrained(rubert_model_name)
	model = AutoModel.from_pretrained(rubert_model_name)

	# Load Logistic Regression model
	logreg_model_path = "model_data/logreg_model_v2.joblib"
	logreg_model = joblib.load(logreg_model_path)

	def embed_bert_cls(text, model, tokenizer):
	"""Generate embeddings for input text using the RuBERT model."""
	inputs = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	embeddings = outputs.last_hidden_state[:, 0, :]
	embeddings = torch.nn.functional.normalize(embeddings)
	return embeddings.cpu().numpy()

	def classify_text(text, model = model, tokenizer = tokenizer, classifier = logreg_model):
	"""Classify text as toxic or non-toxic using embeddings from RuBERT and Logistic Regression."""
	embeddings = embed_bert_cls(text, model, tokenizer)
	prediction = classifier.predict(embeddings)
	dict_class = {0: 'Good', 1: 'Neutral', 2: 'Bad'}

	return dict_class[prediction[0]]