Spaces:

dev-immersfy
/

comic-pipeline-text

Runtime error

Added HTML viewer to Gradio UI

d895ad6 about 1 year ago

1.11 kB

	from bs4 import BeautifulSoup
	import json

	def parse_scene_document(html_content):
	soup = BeautifulSoup(html_content, 'html.parser')
	scene_header = soup.find('h3').text
	scene_number = scene_header.split(': ')[1]

	# Extract synopsis
	synopsis = soup.find('p').text.replace('Synopsis:', '').strip()

	# Extract frames from table
	frames = []
	table = soup.find('table')
	if table:
	rows = table.find_all('tr')[1:] # Skip header row
	for row in rows:
	cells = row.find_all('td')
	frame = {
	'frame_num': cells[0].text.strip(),
	'description': cells[1].text.strip(),
	'characters': eval(cells[2].text.strip()), # Convert string list to actual list
	'narration': cells[3].text.strip(),
	'location': cells[4].text.strip(),
	'setting': cells[5].text.strip()
	}
	print(frame)
	frames.append(frame)

	return {
	'scene_number': scene_number,
	'synopsis': synopsis,
	'frames': frames
	}