File size: 1,736 Bytes
eaa3d8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import re
def delete_special(pre_text, character_list):
for c in character_list:
pre_text = pre_text.replace(c, "")
return pre_text
def break_down2scenes(text: str):
# Split the text based on the 's#' pattern
scenes = re.split(r'(s#\d+)', text)
# Remove empty elements from the split results
scenes = [scene for scene in scenes if scene.strip()]
scenes_list = []
current_scene_number = None
for i in range(0, len(scenes), 2): # Process the 's#' marker and its corresponding text as pairs
scene_marker = scenes[i].strip()
try:
scene_number = int(scene_marker.split('#')[1]) # Extract the number part
except:
if len(scenes) % 2 == 1:
return [scenes[0]]
import ipdb;ipdb.set_trace(context=10)
scene_text = scenes[i+1].strip() if i+1 < len(scenes) else ""
# Check if the scene numbers are in the correct sequence
if current_scene_number is not None:
expected_scene_number = current_scene_number + 1
if scene_number != expected_scene_number:
raise ValueError(f"Unexpected scene number: {scene_number}, expected {expected_scene_number}")
# Store the scene number and its corresponding text together
scenes_list.append({
'detected_scene_number': scene_number,
'text': f"{scene_marker}\n{scene_text}".strip()
})
filtered_scene_list = []
scene_number = 0
for scene_dict in scenes_list:
detected_scene_number = int(scene_dict['detected_scene_number'])
filtered_scene_list.append(scene_dict['text'])
scene_number = detected_scene_number
return filtered_scene_list
|