YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

import json import os

class TranscriptLoader: def init(self, main_folder): # Define relative paths self.transcript_folder = os.path.join(main_folder, 'youtube_scrape', 'transcripts_commercial_archivist') self.json_file = os.path.join(self.transcript_folder, 'transcripts.json') self.data = self._load_data()

def _load_data(self):
    # Load the JSON file
    with open(self.json_file, 'r') as f:
        json_content = f.read()
    
    # Fix the format by adding commas between objects and wrapping it in a list
    json_content = '[' + json_content.replace('}{', '},{') + ']'

    # Parse the JSON data
    try:
        data = json.loads(json_content)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return []

    return data

def get_transcript(self, index):
    try:
        entry = self.data[index]
        # Extract the brand name (first word in the title)
        brand = entry['video_title'].split()[0]
        # Load the transcript from the corresponding .txt file
        transcript_file = os.path.join(self.transcript_folder, f"commercialarchivist_{index}.txt")
        with open(transcript_file, 'r') as f:
            transcript = f.read()
        return brand, transcript
    except IndexError:
        print("Index out of range.")
    except FileNotFoundError:
        print(f"Transcript file not found for index {index}.")
    return None, None

Usage

main_folder = '.' # relative path to the main folder where this script is located loader = TranscriptLoader(main_folder)

Example: Load the transcript and brand of the first video

brand, transcript = loader.get_transcript(0) print(f"Brand: {brand}") print(f"Transcript:\n{transcript}")

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.