Spaces:

mechtnet
/

music_text

Sleeping

File size: 1,200 Bytes

822e133
dd56795
1efb276
9670299
52f310f
822e133
5b2afd8
1efb276
dd56795
5b2afd8
dd56795
5b2afd8
dd56795
 
5b2afd8
 
dd56795
 
 
5b2afd8
1efb276
 
dd56795
5b2afd8
dd56795

from transformers import LongformerTokenizerFast, LongformerForSequenceClassification, pipeline

# Модель для русского текста
model_name = "kazzand/ru-longformer-base-4096"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = LongformerForSequenceClassification.from_pretrained(model_name)

# Создаем классификатор
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

import os

folder_path = "./music_text/lyrics"  # Убедитесь, что путь правильный
results = []

for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            # Анализируем текст без обрезки
            result = classifier(text)  # Без ограничения длины
            results.append({"filename": filename, "result": result})

# Сохраняем результаты
with open("results.json", "w", encoding="utf-8") as output_file:
    import json
    json.dump(results, output_file, ensure_ascii=False, indent=4)