File size: 1,736 Bytes
eaa3d8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import re

def delete_special(pre_text, character_list):
    for c in character_list:
        pre_text = pre_text.replace(c, "")
    return pre_text

def break_down2scenes(text: str):
    # Split the text based on the 's#' pattern
    scenes = re.split(r'(s#\d+)', text)

    # Remove empty elements from the split results
    scenes = [scene for scene in scenes if scene.strip()]

    scenes_list = []
    current_scene_number = None

    for i in range(0, len(scenes), 2):  # Process the 's#' marker and its corresponding text as pairs
        scene_marker = scenes[i].strip()
        try:
            scene_number = int(scene_marker.split('#')[1])  # Extract the number part
        except:
            if len(scenes) % 2 == 1:
                return [scenes[0]]
            import ipdb;ipdb.set_trace(context=10)
        scene_text = scenes[i+1].strip() if i+1 < len(scenes) else ""

        # Check if the scene numbers are in the correct sequence
        if current_scene_number is not None:
            expected_scene_number = current_scene_number + 1
            if scene_number != expected_scene_number:
                raise ValueError(f"Unexpected scene number: {scene_number}, expected {expected_scene_number}")

        # Store the scene number and its corresponding text together
        scenes_list.append({
            'detected_scene_number': scene_number,
            'text': f"{scene_marker}\n{scene_text}".strip()
        })

    filtered_scene_list = []
    scene_number = 0
    for scene_dict in scenes_list:
        detected_scene_number = int(scene_dict['detected_scene_number'])
        filtered_scene_list.append(scene_dict['text'])
        scene_number = detected_scene_number

    return filtered_scene_list