Spaces:

Awlly
/

NLP_app

Sleeping

Awlly commited on Feb 2, 2024

Commit

4a76dec

1 Parent(s): 09e89d7

adjusted imput processing for rubert model

Files changed (1) hide show

app_models/rubert_MODEL.py CHANGED Viewed

@@ -17,7 +17,7 @@ logreg_model = joblib.load(logreg_model_path)
 def embed_bert_cls(text, model, tokenizer):
     """Generate embeddings for input text using the RuBERT model."""
-    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     embeddings = outputs.last_hidden_state[:, 0, :]

 def embed_bert_cls(text, model, tokenizer):
     """Generate embeddings for input text using the RuBERT model."""
+    inputs = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     embeddings = outputs.last_hidden_state[:, 0, :]