Spaces:
Sleeping
Sleeping
File size: 1,200 Bytes
822e133 dd56795 1efb276 9670299 52f310f 822e133 5b2afd8 1efb276 dd56795 5b2afd8 dd56795 5b2afd8 dd56795 5b2afd8 dd56795 5b2afd8 1efb276 dd56795 5b2afd8 dd56795 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from transformers import LongformerTokenizerFast, LongformerForSequenceClassification, pipeline
# Модель для русского текста
model_name = "kazzand/ru-longformer-base-4096"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = LongformerForSequenceClassification.from_pretrained(model_name)
# Создаем классификатор
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
import os
folder_path = "./music_text/lyrics" # Убедитесь, что путь правильный
results = []
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
# Анализируем текст без обрезки
result = classifier(text) # Без ограничения длины
results.append({"filename": filename, "result": result})
# Сохраняем результаты
with open("results.json", "w", encoding="utf-8") as output_file:
import json
json.dump(results, output_file, ensure_ascii=False, indent=4)
|