Spaces:
Runtime error
Runtime error
| from bs4 import BeautifulSoup | |
| import json | |
| def parse_scene_document(html_content): | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| scene_header = soup.find('h3').text | |
| scene_number = scene_header.split(': ')[1] | |
| # Extract synopsis | |
| synopsis = soup.find('p').text.replace('Synopsis:', '').strip() | |
| # Extract frames from table | |
| frames = [] | |
| table = soup.find('table') | |
| if table: | |
| rows = table.find_all('tr')[1:] # Skip header row | |
| for row in rows: | |
| cells = row.find_all('td') | |
| frame = { | |
| 'frame_num': cells[0].text.strip(), | |
| 'description': cells[1].text.strip(), | |
| 'characters': eval(cells[2].text.strip()), # Convert string list to actual list | |
| 'narration': cells[3].text.strip(), | |
| 'location': cells[4].text.strip(), | |
| 'setting': cells[5].text.strip() | |
| } | |
| print(frame) | |
| frames.append(frame) | |
| return { | |
| 'scene_number': scene_number, | |
| 'synopsis': synopsis, | |
| 'frames': frames | |
| } |