Spaces:
Running
Running
| import re | |
| import math | |
| MAX_SCRIPT_LENGTH = 10000 # characters | |
| TTS_1_HD_COST_PER_CHAR = 0.00003 # $30 / 1M chars for tts-1-hd and tts-1 | |
| GPT_4O_MINI_TTS_COST_PER_SECOND = 0.015 / 60 # $0.015 / minute for gpt-4o-mini-tts | |
| CHARS_PER_SECOND_ESTIMATE = 12 # Average characters spoken per second, for estimation | |
| def parse_dialogue_script(script_text): | |
| """ | |
| Parses a dialogue script into a list of dictionaries, each representing a line. | |
| Input format: "[Speaker] Utterance" per line. | |
| Lines not matching the format are assigned to a "Narrator" speaker. | |
| """ | |
| lines = script_text.strip().split('\n') | |
| parsed_lines = [] | |
| total_chars = 0 | |
| if len(script_text) > MAX_SCRIPT_LENGTH: | |
| raise ValueError(f"Script is too long. Maximum {MAX_SCRIPT_LENGTH} characters allowed. Your script has {len(script_text)} characters.") | |
| for i, line_content in enumerate(lines): | |
| line_content = line_content.strip() | |
| if not line_content: | |
| continue # Skip empty lines | |
| match = re.match(r'\[(.*?)\]\s*(.*)', line_content) | |
| if match: | |
| speaker, utterance = match.groups() | |
| speaker = speaker.strip() | |
| utterance = utterance.strip() | |
| if not speaker: # If speaker tag is empty like "[] Text" | |
| speaker = "UnknownSpeaker" | |
| else: | |
| # If no speaker tag, assign the whole line as utterance by "Narrator" | |
| speaker = "Narrator" | |
| utterance = line_content # Already stripped | |
| if not utterance: # Skip if utterance is empty after parsing (e.g. "[Speaker]" with no text) | |
| continue | |
| parsed_lines.append({"id": i, "speaker": speaker, "text": utterance}) | |
| total_chars += len(utterance) | |
| return parsed_lines, total_chars | |
| def calculate_cost(total_chars, num_lines, model_name="tts-1-hd"): | |
| """ | |
| Calculates the estimated cost for TTS processing. | |
| """ | |
| cost = 0.0 | |
| if model_name in ["tts-1", "tts-1-hd"]: # OpenAI charges same for tts-1 and tts-1-hd | |
| cost = total_chars * TTS_1_HD_COST_PER_CHAR | |
| elif model_name == "gpt-4o-mini-tts": | |
| # Estimate duration: total_chars / X chars per second. This is a rough estimate. | |
| # OpenAI pricing for gpt-4o-mini's TTS is by character, similar to tts-1. | |
| # As of latest check, gpt-4o-mini is priced same as tts-1. | |
| # $0.000015 / char ($15.00 / 1M characters) | |
| # Let's update cost for gpt-4o-mini-tts if it differs. | |
| # The prompt says: "# seconds × $0.015 for gpt‑4o‑mini‑tts (0.015 USD / minute)" | |
| # This conflicts with OpenAI's typical character-based TTS pricing. | |
| # Assuming prompt's per-second pricing is the requirement for gpt-4o-mini-tts for this exercise. | |
| if CHARS_PER_SECOND_ESTIMATE <= 0: # Avoid division by zero | |
| estimated_seconds = total_chars / 10.0 # Fallback chars/sec | |
| else: | |
| estimated_seconds = total_chars / CHARS_PER_SECOND_ESTIMATE | |
| cost = estimated_seconds * GPT_4O_MINI_TTS_COST_PER_SECOND | |
| else: # Fallback to character-based costing for any other tts-1 like model | |
| cost = total_chars * TTS_1_HD_COST_PER_CHAR | |
| # raise ValueError(f"Unknown model for cost calculation: {model_name}") # Or assume default if model not matched | |
| return cost | |
| if __name__ == '__main__': | |
| sample_script_1 = """ | |
| [Alice] Hello Bob, how are you? | |
| [Bob] I'm fine, Alice. And you? | |
| This is a line without a speaker tag. | |
| [Charlie] Just listening in. | |
| [] This line has an empty speaker tag. | |
| [EmptySpeakerText] | |
| """ | |
| print(f"--- Test Case 1: Mixed Script ---") | |
| parsed, chars = parse_dialogue_script(sample_script_1) | |
| print("Parsed Lines:") | |
| for p_line in parsed: | |
| print(p_line) | |
| print(f"\nTotal Characters for TTS: {chars}") | |
| cost_hd = calculate_cost(chars, len(parsed), "tts-1-hd") | |
| print(f"Estimated cost for tts-1-hd: ${cost_hd:.6f}") | |
| cost_tts1 = calculate_cost(chars, len(parsed), "tts-1") | |
| print(f"Estimated cost for tts-1: ${cost_tts1:.6f}") | |
| # Test cost for gpt-4o-mini-tts using the per-second formula | |
| cost_gpt_mini = calculate_cost(chars, len(parsed), "gpt-4o-mini-tts") | |
| print(f"Estimated cost for gpt-4o-mini-tts (at {CHARS_PER_SECOND_ESTIMATE} chars/sec): ${cost_gpt_mini:.6f}") | |
| print(f"\n--- Test Case 2: Long Script (Boundary Check) ---") | |
| long_script_text = "[SpeakerA] " + "a" * (MAX_SCRIPT_LENGTH - 11) # 11 chars for "[SpeakerA] " | |
| parsed_long, chars_long = parse_dialogue_script(long_script_text) | |
| print(f"Long script (length {len(long_script_text)} chars) parsed successfully. TTS Chars: {chars_long}") | |
| try: | |
| too_long_script = "a" * (MAX_SCRIPT_LENGTH + 1) | |
| parse_dialogue_script(too_long_script) | |
| except ValueError as e: | |
| print(f"Correctly caught error for too long script: {e}") | |
| print(f"\n--- Test Case 3: Empty and Invalid Scripts ---") | |
| parsed_empty, chars_empty = parse_dialogue_script("") | |
| print(f"Empty script: Parsed lines: {len(parsed_empty)}, Chars: {chars_empty}") | |
| parsed_blank_lines, chars_blank_lines = parse_dialogue_script("\n\n[Speaker]\n\n") | |
| print(f"Script with blank/invalid lines: Parsed lines: {len(parsed_blank_lines)}, Chars: {chars_blank_lines} (Result: {parsed_blank_lines})") |