JaceWei commited on
Commit
0f74dc7
Β·
1 Parent(s): 90f8a2b
Paper2Poster/PosterAgent/__init__.py CHANGED
@@ -1,16 +1,16 @@
1
  from . import (
2
- apply_theme,
3
- create_dataset,
4
- deoverflow,
5
- deoverflow_parallel,
6
- fill_and_style,
7
- gen_outline_layout_parallel,
8
  gen_outline_layout,
9
- gen_poster_content,
10
- gen_pptx_code,
11
- LLM_direct_generate,
12
- new_pipeline,
13
  parse_raw,
14
- poster_gen_pipeline,
15
  tree_split_layout
16
  )
 
1
  from . import (
2
+ # apply_theme,
3
+ # create_dataset,
4
+ # deoverflow,
5
+ # deoverflow_parallel,
6
+ # fill_and_style,
7
+ # gen_outline_layout_parallel,
8
  gen_outline_layout,
9
+ # gen_poster_content,
10
+ # gen_pptx_code,
11
+ # LLM_direct_generate,
12
+ # new_pipeline,
13
  parse_raw,
14
+ # poster_gen_pipeline,
15
  tree_split_layout
16
  )
Paper2Poster/PosterAgent/gen_outline_layout.py CHANGED
@@ -3,15 +3,16 @@ import os
3
  import json
4
  import copy
5
  import yaml
 
6
  from jinja2 import Environment, StrictUndefined
7
 
8
- from utils.src.utils import ppt_to_images, get_json_from_response
9
 
10
  from camel.models import ModelFactory
11
  from camel.agents import ChatAgent
12
  from camel.messages import BaseMessage
13
 
14
- from utils.pptx_utils import *
15
  from utils.wei_utils import *
16
 
17
  import pickle as pkl
@@ -24,6 +25,23 @@ IMAGE_SCALE_RATIO_MAX = 40
24
  TABLE_SCALE_RATIO_MIN = 100
25
  TABLE_SCALE_RATIO_MAX = 80
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def compute_tp(raw_content_json):
28
  total_length = 0
29
  for section in raw_content_json['sections']:
@@ -426,408 +444,408 @@ def gen_outline_layout_v2(args, actor_config):
426
 
427
  return total_input_token, total_output_token, paper_panels, figure_arrangement
428
 
429
- def gen_outline_layout(args, actor_config, critic_config):
430
- poster_log_path = f'log/{args.model_name}_{args.poster_name}_poster_{args.index}'
431
- if not os.path.exists(poster_log_path):
432
- os.mkdir(poster_log_path)
433
- total_input_token, total_output_token = 0, 0
434
- consumption_log = {
435
- 'outline': [],
436
- 'h1_actor': [],
437
- 'h2_actor': [],
438
- 'h1_critic': [],
439
- 'gen_layout': []
440
- }
441
- jinja_env = Environment(undefined=StrictUndefined)
442
- outline_file_path = f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json'
443
- agent_name = 'poster_planner_new'
444
- agent_init_name = 'layout_agent_init'
445
- agent_new_section_name = 'layout_agent_new_section'
446
- h1_critic_name = 'critic_layout_hierarchy_1'
447
- h2_actor_name = 'actor_layout_hierarchy_2'
448
-
449
- doc_json = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
450
- filtered_table_information = json.load(open(f'images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
451
- filtered_image_information = json.load(open(f'images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
452
-
453
- with open(f"utils/prompt_templates/{agent_name}.yaml", "r", encoding="utf-8") as f:
454
- planner_config = yaml.safe_load(f)
455
-
456
- with open(f"utils/prompt_templates/{agent_init_name}.yaml", "r", encoding="utf-8") as f:
457
- config_init = yaml.safe_load(f)
458
-
459
- with open(f"utils/prompt_templates/{agent_new_section_name}.yaml", "r", encoding="utf-8") as f:
460
- config_new_section = yaml.safe_load(f)
461
-
462
- with open(f"utils/prompt_templates/{h1_critic_name}.yaml", "r", encoding="utf-8") as f:
463
- config_h1_critic = yaml.safe_load(f)
464
-
465
- with open(f"utils/prompt_templates/{h2_actor_name}.yaml", "r", encoding="utf-8") as f:
466
- config_h2_actor = yaml.safe_load(f)
467
-
468
- planner_model = ModelFactory.create(
469
- model_platform=actor_config['model_platform'],
470
- model_type=actor_config['model_type'],
471
- model_config_dict=actor_config['model_config'],
472
- )
473
-
474
- planner_agent = ChatAgent(
475
- system_message=planner_config['system_prompt'],
476
- model=planner_model,
477
- message_window_size=10,
478
- )
479
-
480
- outline_template = jinja_env.from_string(planner_config["template"])
481
-
482
- planner_jinja_args = {
483
- 'json_content': doc_json,
484
- 'table_information': filtered_table_information,
485
- 'image_information': filtered_image_information,
486
- }
487
-
488
- actor_model = ModelFactory.create(
489
- model_platform=actor_config['model_platform'],
490
- model_type=actor_config['model_type'],
491
- model_config_dict=actor_config['model_config'],
492
- )
493
-
494
- init_actor_sys_msg = config_init['system_prompt']
495
-
496
- init_actor_agent = ChatAgent(
497
- system_message=init_actor_sys_msg,
498
- model=actor_model,
499
- message_window_size=10,
500
- )
501
-
502
- new_section_actor_sys_msg = config_new_section['system_prompt']
503
- new_section_actor_agent = ChatAgent(
504
- system_message=new_section_actor_sys_msg,
505
- model=actor_model,
506
- message_window_size=10,
507
- )
508
-
509
- h1_critic_model = ModelFactory.create(
510
- model_platform=critic_config['model_platform'],
511
- model_type=critic_config['model_type'],
512
- model_config_dict=critic_config['model_config'],
513
- )
514
-
515
- h1_critic_sys_msg = config_h1_critic['system_prompt']
516
-
517
- h1_critic_agent = ChatAgent(
518
- system_message=h1_critic_sys_msg,
519
- model=h1_critic_model,
520
- message_window_size=None,
521
- )
522
-
523
- h1_pos_example = Image.open('assets/h1_example/h1_pos.jpg')
524
- h1_neg_example = Image.open('assets/h1_example/h1_neg.jpg')
525
-
526
- h2_actor_model = ModelFactory.create(
527
- model_platform=actor_config['model_platform'],
528
- model_type=actor_config['model_type'],
529
- model_config_dict=actor_config['model_config'],
530
- )
531
-
532
- h2_actor_sys_msg = config_h2_actor['system_prompt']
533
-
534
- h2_actor_agent = ChatAgent(
535
- system_message=h2_actor_sys_msg,
536
- model=h2_actor_model,
537
- message_window_size=10,
538
- )
539
-
540
- attempt = 0
541
- while True:
542
- print(f'Generating outline attempt {attempt}...')
543
- planner_prompt = outline_template.render(**planner_jinja_args)
544
- planner_agent.reset()
545
- response = planner_agent.step(planner_prompt)
546
- input_token, output_token = account_token(response)
547
- consumption_log['outline'].append((input_token, output_token))
548
- total_input_token += input_token
549
- total_output_token += output_token
550
-
551
- outline = get_json_from_response(response.msgs[0].content)
552
- name_to_hierarchy = get_hierarchy(outline)
553
-
554
- sections = list(outline.keys())
555
- sections = [x for x in sections if x != 'meta']
556
- init_template = jinja_env.from_string(config_init["template"])
557
- new_section_template = jinja_env.from_string(config_new_section["template"])
558
- h1_critic_template = jinja_env.from_string(config_h1_critic["template"])
559
- init_outline = {'meta': outline['meta'], sections[0]: outline[sections[0]]}
560
-
561
- new_outline = outline
562
-
563
- init_jinja_args = {
564
- 'json_outline': init_outline,
565
- 'function_docs': documentation
566
- }
567
-
568
- init_prompt = init_template.render(**init_jinja_args)
569
-
570
- # hierarchy 1 only
571
- outline_location = get_outline_location(outline, subsection=False)
572
- logs = {}
573
- curr_section = sections[0]
574
-
575
- layout_cumulative_input_token = 0
576
- layout_cumulative_output_token = 0
577
-
578
- print('Generating h1 layout...\n')
579
- print(f'Generating h1 layout for section {curr_section}...')
580
- logs[curr_section] = gen_layout(
581
- init_actor_agent,
582
- init_prompt,
583
- args.max_retry,
584
- name_to_hierarchy,
585
- visual_identifier=curr_section
586
- )
587
-
588
- if logs[curr_section][-1]['error'] is not None:
589
- raise ValueError(f'Failed to generate layout for section {curr_section}.')
590
 
591
- layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
592
- layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
593
-
594
- for section_index in range(1, len(sections)):
595
- curr_section = sections[section_index]
596
- print(f'generating h1 layout for section {curr_section}...')
597
- new_section_outline = {curr_section: new_outline[curr_section]}
598
- new_section_jinja_args = {
599
- 'json_outline': new_section_outline,
600
- 'function_docs': documentation
601
- }
602
- new_section_prompt = new_section_template.render(**new_section_jinja_args)
603
-
604
- logs[curr_section] = gen_layout(
605
- new_section_actor_agent,
606
- new_section_prompt,
607
- args.max_retry,
608
- name_to_hierarchy,
609
- visual_identifier=curr_section,
610
- existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
611
- )
612
- if logs[curr_section][-1]['error'] is not None:
613
- raise ValueError(f'Failed to generate layout for section {curr_section}.')
614
 
615
- layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
616
- layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
617
-
618
- consumption_log['h1_actor'].append((layout_cumulative_input_token, layout_cumulative_output_token))
619
- total_input_token += layout_cumulative_input_token
620
- total_output_token += layout_cumulative_output_token
621
-
622
- h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
623
- h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
624
-
625
- h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
626
- h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
627
-
628
- ppt_to_images(h1_path, 'tmp/layout_h1')
629
- ppt_to_images(h2_path, 'tmp/layout_h2')
630
- ppt_to_images(h1_filled_path, 'tmp/layout_h1_filled')
631
- ppt_to_images(h2_filled_path, 'tmp/layout_h2_filled')
632
-
633
- h1_img = Image.open('tmp/layout_h1/slide_0001.jpg')
634
- h2_img = Image.open('tmp/layout_h2/slide_0001.jpg')
635
- h1_filled_img = Image.open('tmp/layout_h1_filled/slide_0001.jpg')
636
- h2_filled_img = Image.open('tmp/layout_h2_filled/slide_0001.jpg')
637
-
638
- h1_critic_msg = BaseMessage.make_user_message(
639
- role_name='User',
640
- content=h1_critic_template.render(),
641
- image_list=[h1_neg_example, h1_pos_example, h1_filled_img]
642
- )
643
-
644
- outline_bbox_dict = {}
645
- for k, v in outline_location.items():
646
- outline_bbox_dict[k] = v['location']
647
-
648
- bbox_check_result = check_bounding_boxes(
649
- outline_bbox_dict,
650
- new_outline['meta']['width'],
651
- new_outline['meta']['height']
652
- )
653
-
654
- if len(bbox_check_result) != 0:
655
- print(bbox_check_result)
656
- attempt += 1
657
- continue
658
-
659
- h1_critic_agent.reset()
660
- response = h1_critic_agent.step(h1_critic_msg)
661
- input_token, output_token = account_token(response)
662
- consumption_log['h1_critic'].append((input_token, output_token))
663
- total_input_token += input_token
664
- total_output_token += output_token
665
- if response.msgs[0].content == 'T':
666
- print('Blank area detected.')
667
- attempt += 1
668
- continue
669
-
670
- break
671
-
672
- outline_bbox_dict = {}
673
- for k, v in outline_location.items():
674
- outline_bbox_dict[k] = v['location']
675
-
676
- # Generate subsection locations
677
- outline_no_sub_locations = copy.deepcopy(new_outline)
678
- if 'meta' in outline_no_sub_locations:
679
- outline_no_sub_locations.pop('meta')
680
-
681
- for k, v in outline_no_sub_locations.items():
682
- if 'subsections' in v:
683
- subsections = v['subsections']
684
- for k_sub, v_sub in subsections.items():
685
- del v_sub['location']
686
- del v_sub['name']
687
-
688
- h2_actor_template = jinja_env.from_string(config_h2_actor["template"])
689
-
690
- h2_cumulative_input_token = 0
691
- h2_cumulative_output_token = 0
692
 
693
- for section in sections:
694
- while True:
695
- print(f'generating h2 for section {section}...')
696
- section_outline = {section: outline_no_sub_locations[section]}
697
- section_jinja_args = {
698
- 'section_outline': json.dumps(section_outline, indent=4),
699
- }
700
-
701
- section_prompt = h2_actor_template.render(**section_jinja_args)
702
-
703
- h2_actor_agent.reset()
704
- response = h2_actor_agent.step(section_prompt)
705
- input_token, output_token = account_token(response)
706
- h2_cumulative_input_token += input_token
707
- h2_cumulative_output_token += output_token
708
- subsection_location = get_json_from_response(response.msgs[0].content)
709
-
710
- sec_bbox = outline_no_sub_locations[section]['location']
711
- subsection_location_dict = {}
712
- for k, v in subsection_location.items():
713
- subsection_location_dict[k] = {
714
- 'left': v['location'][0],
715
- 'top': v['location'][1],
716
- 'width': v['location'][2],
717
- 'height': v['location'][3]
718
- }
719
-
720
- is_valid, revised = validate_and_adjust_subsections(sec_bbox, subsection_location_dict)
721
- if not is_valid:
722
- is_valid, revised = validate_and_adjust_subsections(sec_bbox, revised)
723
- assert is_valid, "Failed to adjust subsections to fit section"
724
- outline_no_sub_locations = fill_location(outline_no_sub_locations, section, revised)
725
- else:
726
- outline_no_sub_locations = fill_location(outline_no_sub_locations, section, subsection_location)
727
- break
728
-
729
- consumption_log['h2_actor'].append((h2_cumulative_input_token, h2_cumulative_output_token))
730
- total_input_token += h2_cumulative_input_token
731
- total_output_token += h2_cumulative_output_token
732
-
733
- outline_no_sub_locations['meta'] = outline['meta']
734
- outline_no_sub_locations_with_name = recover_name_and_location(outline_no_sub_locations, new_outline)
735
- new_outline = outline_no_sub_locations_with_name
736
-
737
- ### Outline finalized, actually generate layout
738
-
739
- logs = {}
740
-
741
- gen_layout_cumulative_input_token = 0
742
- gen_layout_cumulative_output_token = 0
743
- curr_section = sections[0]
744
-
745
- init_outline = {'meta': new_outline['meta'], sections[0]: new_outline[sections[0]]}
746
-
747
- init_jinja_args = {
748
- 'json_outline': init_outline,
749
- 'function_docs': documentation
750
- }
751
-
752
- init_prompt = init_template.render(**init_jinja_args)
753
- logs[curr_section] = gen_layout(
754
- init_actor_agent,
755
- init_prompt,
756
- args.max_retry,
757
- name_to_hierarchy,
758
- visual_identifier=curr_section
759
- )
760
-
761
- if logs[curr_section][-1]['error'] is not None:
762
- raise ValueError(f'Failed to generate layout for section {curr_section}.')
763
-
764
- gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
765
- gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
766
-
767
- for section_index in range(1, len(sections)):
768
- curr_section = sections[section_index]
769
- print(f'generating section {curr_section}...')
770
- new_section_outline = {curr_section: new_outline[curr_section]}
771
- new_section_jinja_args = {
772
- 'json_outline': new_section_outline,
773
- 'function_docs': documentation
774
- }
775
- new_section_prompt = new_section_template.render(**new_section_jinja_args)
776
-
777
- logs[curr_section] = gen_layout(
778
- new_section_actor_agent,
779
- new_section_prompt,
780
- args.max_retry,
781
- name_to_hierarchy,
782
- visual_identifier=curr_section,
783
- existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
784
- )
785
- if logs[curr_section][-1]['error'] is not None:
786
- raise ValueError(f'Failed to generate layout for section {curr_section}.')
787
 
788
- gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
789
- gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
790
-
791
- consumption_log['gen_layout'].append((gen_layout_cumulative_input_token, gen_layout_cumulative_output_token))
792
- total_input_token += gen_layout_cumulative_input_token
793
- total_output_token += gen_layout_cumulative_output_token
794
-
795
- h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
796
- h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
797
-
798
- h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
799
- h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
800
-
801
- ppt_to_images(h1_path, f'{poster_log_path}/layout_h1')
802
- ppt_to_images(h2_path, f'{poster_log_path}/layout_h2')
803
- ppt_to_images(h1_filled_path, f'{poster_log_path}/layout_h1_filled')
804
- ppt_to_images(h2_filled_path, f'{poster_log_path}/layout_h2_filled')
805
-
806
- h1_img = Image.open(f'{poster_log_path}/layout_h1/slide_0001.jpg')
807
- h2_img = Image.open(f'{poster_log_path}/layout_h2/slide_0001.jpg')
808
- h1_filled_img = Image.open(f'{poster_log_path}/layout_h1_filled/slide_0001.jpg')
809
- h2_filled_img = Image.open(f'{poster_log_path}/layout_h2_filled/slide_0001.jpg')
810
-
811
- ckpt = {
812
- 'logs': logs,
813
- 'outline': new_outline,
814
- 'name_to_hierarchy': name_to_hierarchy,
815
- 'consumption_log': consumption_log,
816
- 'total_input_token': total_input_token,
817
- 'total_output_token': total_output_token,
818
- }
819
-
820
- with open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'wb') as f:
821
- pkl.dump(ckpt, f)
822
-
823
- json.dump(
824
- new_outline,
825
- open(outline_file_path, "w"),
826
- ensure_ascii=False,
827
- indent=4,
828
- )
829
-
830
- return total_input_token, total_output_token
831
 
832
  if __name__ == '__main__':
833
  parser = argparse.ArgumentParser()
 
3
  import json
4
  import copy
5
  import yaml
6
+ import json_repair
7
  from jinja2 import Environment, StrictUndefined
8
 
9
+ # from utils.src.utils import ppt_to_images, get_json_from_response
10
 
11
  from camel.models import ModelFactory
12
  from camel.agents import ChatAgent
13
  from camel.messages import BaseMessage
14
 
15
+ # from utils.pptx_utils import *
16
  from utils.wei_utils import *
17
 
18
  import pickle as pkl
 
25
  TABLE_SCALE_RATIO_MIN = 100
26
  TABLE_SCALE_RATIO_MAX = 80
27
 
28
+ def get_json_from_response(raw_response: str):
29
+ response = raw_response.strip()
30
+ l, r = response.rfind("```json"), response.rfind("```")
31
+ try:
32
+ if l == -1 or r == -1:
33
+ response = json_repair.loads(response)
34
+ else:
35
+ response = json_repair.loads(response[l + 7 : r].strip())
36
+ return response
37
+ except Exception as e:
38
+ raise RuntimeError("Failed to parse JSON from response", e)
39
+
40
+ def account_token(response):
41
+ input_token = response.info['usage']['prompt_tokens']
42
+ output_token = response.info['usage']['completion_tokens']
43
+
44
+ return input_token, output_token
45
  def compute_tp(raw_content_json):
46
  total_length = 0
47
  for section in raw_content_json['sections']:
 
444
 
445
  return total_input_token, total_output_token, paper_panels, figure_arrangement
446
 
447
+ # def gen_outline_layout(args, actor_config, critic_config):
448
+ # poster_log_path = f'log/{args.model_name}_{args.poster_name}_poster_{args.index}'
449
+ # if not os.path.exists(poster_log_path):
450
+ # os.mkdir(poster_log_path)
451
+ # total_input_token, total_output_token = 0, 0
452
+ # consumption_log = {
453
+ # 'outline': [],
454
+ # 'h1_actor': [],
455
+ # 'h2_actor': [],
456
+ # 'h1_critic': [],
457
+ # 'gen_layout': []
458
+ # }
459
+ # jinja_env = Environment(undefined=StrictUndefined)
460
+ # outline_file_path = f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json'
461
+ # agent_name = 'poster_planner_new'
462
+ # agent_init_name = 'layout_agent_init'
463
+ # agent_new_section_name = 'layout_agent_new_section'
464
+ # h1_critic_name = 'critic_layout_hierarchy_1'
465
+ # h2_actor_name = 'actor_layout_hierarchy_2'
466
+
467
+ # doc_json = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
468
+ # filtered_table_information = json.load(open(f'images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
469
+ # filtered_image_information = json.load(open(f'images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
470
+
471
+ # with open(f"utils/prompt_templates/{agent_name}.yaml", "r", encoding="utf-8") as f:
472
+ # planner_config = yaml.safe_load(f)
473
+
474
+ # with open(f"utils/prompt_templates/{agent_init_name}.yaml", "r", encoding="utf-8") as f:
475
+ # config_init = yaml.safe_load(f)
476
+
477
+ # with open(f"utils/prompt_templates/{agent_new_section_name}.yaml", "r", encoding="utf-8") as f:
478
+ # config_new_section = yaml.safe_load(f)
479
+
480
+ # with open(f"utils/prompt_templates/{h1_critic_name}.yaml", "r", encoding="utf-8") as f:
481
+ # config_h1_critic = yaml.safe_load(f)
482
+
483
+ # with open(f"utils/prompt_templates/{h2_actor_name}.yaml", "r", encoding="utf-8") as f:
484
+ # config_h2_actor = yaml.safe_load(f)
485
+
486
+ # planner_model = ModelFactory.create(
487
+ # model_platform=actor_config['model_platform'],
488
+ # model_type=actor_config['model_type'],
489
+ # model_config_dict=actor_config['model_config'],
490
+ # )
491
+
492
+ # planner_agent = ChatAgent(
493
+ # system_message=planner_config['system_prompt'],
494
+ # model=planner_model,
495
+ # message_window_size=10,
496
+ # )
497
+
498
+ # outline_template = jinja_env.from_string(planner_config["template"])
499
+
500
+ # planner_jinja_args = {
501
+ # 'json_content': doc_json,
502
+ # 'table_information': filtered_table_information,
503
+ # 'image_information': filtered_image_information,
504
+ # }
505
+
506
+ # actor_model = ModelFactory.create(
507
+ # model_platform=actor_config['model_platform'],
508
+ # model_type=actor_config['model_type'],
509
+ # model_config_dict=actor_config['model_config'],
510
+ # )
511
+
512
+ # init_actor_sys_msg = config_init['system_prompt']
513
+
514
+ # init_actor_agent = ChatAgent(
515
+ # system_message=init_actor_sys_msg,
516
+ # model=actor_model,
517
+ # message_window_size=10,
518
+ # )
519
+
520
+ # new_section_actor_sys_msg = config_new_section['system_prompt']
521
+ # new_section_actor_agent = ChatAgent(
522
+ # system_message=new_section_actor_sys_msg,
523
+ # model=actor_model,
524
+ # message_window_size=10,
525
+ # )
526
+
527
+ # h1_critic_model = ModelFactory.create(
528
+ # model_platform=critic_config['model_platform'],
529
+ # model_type=critic_config['model_type'],
530
+ # model_config_dict=critic_config['model_config'],
531
+ # )
532
+
533
+ # h1_critic_sys_msg = config_h1_critic['system_prompt']
534
+
535
+ # h1_critic_agent = ChatAgent(
536
+ # system_message=h1_critic_sys_msg,
537
+ # model=h1_critic_model,
538
+ # message_window_size=None,
539
+ # )
540
+
541
+ # h1_pos_example = Image.open('assets/h1_example/h1_pos.jpg')
542
+ # h1_neg_example = Image.open('assets/h1_example/h1_neg.jpg')
543
+
544
+ # h2_actor_model = ModelFactory.create(
545
+ # model_platform=actor_config['model_platform'],
546
+ # model_type=actor_config['model_type'],
547
+ # model_config_dict=actor_config['model_config'],
548
+ # )
549
+
550
+ # h2_actor_sys_msg = config_h2_actor['system_prompt']
551
+
552
+ # h2_actor_agent = ChatAgent(
553
+ # system_message=h2_actor_sys_msg,
554
+ # model=h2_actor_model,
555
+ # message_window_size=10,
556
+ # )
557
+
558
+ # attempt = 0
559
+ # while True:
560
+ # print(f'Generating outline attempt {attempt}...')
561
+ # planner_prompt = outline_template.render(**planner_jinja_args)
562
+ # planner_agent.reset()
563
+ # response = planner_agent.step(planner_prompt)
564
+ # input_token, output_token = account_token(response)
565
+ # consumption_log['outline'].append((input_token, output_token))
566
+ # total_input_token += input_token
567
+ # total_output_token += output_token
568
+
569
+ # outline = get_json_from_response(response.msgs[0].content)
570
+ # name_to_hierarchy = get_hierarchy(outline)
571
+
572
+ # sections = list(outline.keys())
573
+ # sections = [x for x in sections if x != 'meta']
574
+ # init_template = jinja_env.from_string(config_init["template"])
575
+ # new_section_template = jinja_env.from_string(config_new_section["template"])
576
+ # h1_critic_template = jinja_env.from_string(config_h1_critic["template"])
577
+ # init_outline = {'meta': outline['meta'], sections[0]: outline[sections[0]]}
578
+
579
+ # new_outline = outline
580
+
581
+ # init_jinja_args = {
582
+ # 'json_outline': init_outline,
583
+ # 'function_docs': documentation
584
+ # }
585
+
586
+ # init_prompt = init_template.render(**init_jinja_args)
587
+
588
+ # # hierarchy 1 only
589
+ # outline_location = get_outline_location(outline, subsection=False)
590
+ # logs = {}
591
+ # curr_section = sections[0]
592
+
593
+ # layout_cumulative_input_token = 0
594
+ # layout_cumulative_output_token = 0
595
+
596
+ # print('Generating h1 layout...\n')
597
+ # print(f'Generating h1 layout for section {curr_section}...')
598
+ # logs[curr_section] = gen_layout(
599
+ # init_actor_agent,
600
+ # init_prompt,
601
+ # args.max_retry,
602
+ # name_to_hierarchy,
603
+ # visual_identifier=curr_section
604
+ # )
605
+
606
+ # if logs[curr_section][-1]['error'] is not None:
607
+ # raise ValueError(f'Failed to generate layout for section {curr_section}.')
608
 
609
+ # layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
610
+ # layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
611
+
612
+ # for section_index in range(1, len(sections)):
613
+ # curr_section = sections[section_index]
614
+ # print(f'generating h1 layout for section {curr_section}...')
615
+ # new_section_outline = {curr_section: new_outline[curr_section]}
616
+ # new_section_jinja_args = {
617
+ # 'json_outline': new_section_outline,
618
+ # 'function_docs': documentation
619
+ # }
620
+ # new_section_prompt = new_section_template.render(**new_section_jinja_args)
621
+
622
+ # logs[curr_section] = gen_layout(
623
+ # new_section_actor_agent,
624
+ # new_section_prompt,
625
+ # args.max_retry,
626
+ # name_to_hierarchy,
627
+ # visual_identifier=curr_section,
628
+ # existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
629
+ # )
630
+ # if logs[curr_section][-1]['error'] is not None:
631
+ # raise ValueError(f'Failed to generate layout for section {curr_section}.')
632
 
633
+ # layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
634
+ # layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
635
+
636
+ # consumption_log['h1_actor'].append((layout_cumulative_input_token, layout_cumulative_output_token))
637
+ # total_input_token += layout_cumulative_input_token
638
+ # total_output_token += layout_cumulative_output_token
639
+
640
+ # h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
641
+ # h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
642
+
643
+ # h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
644
+ # h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
645
+
646
+ # ppt_to_images(h1_path, 'tmp/layout_h1')
647
+ # ppt_to_images(h2_path, 'tmp/layout_h2')
648
+ # ppt_to_images(h1_filled_path, 'tmp/layout_h1_filled')
649
+ # ppt_to_images(h2_filled_path, 'tmp/layout_h2_filled')
650
+
651
+ # h1_img = Image.open('tmp/layout_h1/slide_0001.jpg')
652
+ # h2_img = Image.open('tmp/layout_h2/slide_0001.jpg')
653
+ # h1_filled_img = Image.open('tmp/layout_h1_filled/slide_0001.jpg')
654
+ # h2_filled_img = Image.open('tmp/layout_h2_filled/slide_0001.jpg')
655
+
656
+ # h1_critic_msg = BaseMessage.make_user_message(
657
+ # role_name='User',
658
+ # content=h1_critic_template.render(),
659
+ # image_list=[h1_neg_example, h1_pos_example, h1_filled_img]
660
+ # )
661
+
662
+ # outline_bbox_dict = {}
663
+ # for k, v in outline_location.items():
664
+ # outline_bbox_dict[k] = v['location']
665
+
666
+ # bbox_check_result = check_bounding_boxes(
667
+ # outline_bbox_dict,
668
+ # new_outline['meta']['width'],
669
+ # new_outline['meta']['height']
670
+ # )
671
+
672
+ # if len(bbox_check_result) != 0:
673
+ # print(bbox_check_result)
674
+ # attempt += 1
675
+ # continue
676
+
677
+ # h1_critic_agent.reset()
678
+ # response = h1_critic_agent.step(h1_critic_msg)
679
+ # input_token, output_token = account_token(response)
680
+ # consumption_log['h1_critic'].append((input_token, output_token))
681
+ # total_input_token += input_token
682
+ # total_output_token += output_token
683
+ # if response.msgs[0].content == 'T':
684
+ # print('Blank area detected.')
685
+ # attempt += 1
686
+ # continue
687
+
688
+ # break
689
+
690
+ # outline_bbox_dict = {}
691
+ # for k, v in outline_location.items():
692
+ # outline_bbox_dict[k] = v['location']
693
+
694
+ # # Generate subsection locations
695
+ # outline_no_sub_locations = copy.deepcopy(new_outline)
696
+ # if 'meta' in outline_no_sub_locations:
697
+ # outline_no_sub_locations.pop('meta')
698
+
699
+ # for k, v in outline_no_sub_locations.items():
700
+ # if 'subsections' in v:
701
+ # subsections = v['subsections']
702
+ # for k_sub, v_sub in subsections.items():
703
+ # del v_sub['location']
704
+ # del v_sub['name']
705
+
706
+ # h2_actor_template = jinja_env.from_string(config_h2_actor["template"])
707
+
708
+ # h2_cumulative_input_token = 0
709
+ # h2_cumulative_output_token = 0
710
 
711
+ # for section in sections:
712
+ # while True:
713
+ # print(f'generating h2 for section {section}...')
714
+ # section_outline = {section: outline_no_sub_locations[section]}
715
+ # section_jinja_args = {
716
+ # 'section_outline': json.dumps(section_outline, indent=4),
717
+ # }
718
+
719
+ # section_prompt = h2_actor_template.render(**section_jinja_args)
720
+
721
+ # h2_actor_agent.reset()
722
+ # response = h2_actor_agent.step(section_prompt)
723
+ # input_token, output_token = account_token(response)
724
+ # h2_cumulative_input_token += input_token
725
+ # h2_cumulative_output_token += output_token
726
+ # subsection_location = get_json_from_response(response.msgs[0].content)
727
+
728
+ # sec_bbox = outline_no_sub_locations[section]['location']
729
+ # subsection_location_dict = {}
730
+ # for k, v in subsection_location.items():
731
+ # subsection_location_dict[k] = {
732
+ # 'left': v['location'][0],
733
+ # 'top': v['location'][1],
734
+ # 'width': v['location'][2],
735
+ # 'height': v['location'][3]
736
+ # }
737
+
738
+ # is_valid, revised = validate_and_adjust_subsections(sec_bbox, subsection_location_dict)
739
+ # if not is_valid:
740
+ # is_valid, revised = validate_and_adjust_subsections(sec_bbox, revised)
741
+ # assert is_valid, "Failed to adjust subsections to fit section"
742
+ # outline_no_sub_locations = fill_location(outline_no_sub_locations, section, revised)
743
+ # else:
744
+ # outline_no_sub_locations = fill_location(outline_no_sub_locations, section, subsection_location)
745
+ # break
746
+
747
+ # consumption_log['h2_actor'].append((h2_cumulative_input_token, h2_cumulative_output_token))
748
+ # total_input_token += h2_cumulative_input_token
749
+ # total_output_token += h2_cumulative_output_token
750
+
751
+ # outline_no_sub_locations['meta'] = outline['meta']
752
+ # outline_no_sub_locations_with_name = recover_name_and_location(outline_no_sub_locations, new_outline)
753
+ # new_outline = outline_no_sub_locations_with_name
754
+
755
+ # ### Outline finalized, actually generate layout
756
+
757
+ # logs = {}
758
+
759
+ # gen_layout_cumulative_input_token = 0
760
+ # gen_layout_cumulative_output_token = 0
761
+ # curr_section = sections[0]
762
+
763
+ # init_outline = {'meta': new_outline['meta'], sections[0]: new_outline[sections[0]]}
764
+
765
+ # init_jinja_args = {
766
+ # 'json_outline': init_outline,
767
+ # 'function_docs': documentation
768
+ # }
769
+
770
+ # init_prompt = init_template.render(**init_jinja_args)
771
+ # logs[curr_section] = gen_layout(
772
+ # init_actor_agent,
773
+ # init_prompt,
774
+ # args.max_retry,
775
+ # name_to_hierarchy,
776
+ # visual_identifier=curr_section
777
+ # )
778
+
779
+ # if logs[curr_section][-1]['error'] is not None:
780
+ # raise ValueError(f'Failed to generate layout for section {curr_section}.')
781
+
782
+ # gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
783
+ # gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
784
+
785
+ # for section_index in range(1, len(sections)):
786
+ # curr_section = sections[section_index]
787
+ # print(f'generating section {curr_section}...')
788
+ # new_section_outline = {curr_section: new_outline[curr_section]}
789
+ # new_section_jinja_args = {
790
+ # 'json_outline': new_section_outline,
791
+ # 'function_docs': documentation
792
+ # }
793
+ # new_section_prompt = new_section_template.render(**new_section_jinja_args)
794
+
795
+ # logs[curr_section] = gen_layout(
796
+ # new_section_actor_agent,
797
+ # new_section_prompt,
798
+ # args.max_retry,
799
+ # name_to_hierarchy,
800
+ # visual_identifier=curr_section,
801
+ # existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
802
+ # )
803
+ # if logs[curr_section][-1]['error'] is not None:
804
+ # raise ValueError(f'Failed to generate layout for section {curr_section}.')
805
 
806
+ # gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
807
+ # gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
808
+
809
+ # consumption_log['gen_layout'].append((gen_layout_cumulative_input_token, gen_layout_cumulative_output_token))
810
+ # total_input_token += gen_layout_cumulative_input_token
811
+ # total_output_token += gen_layout_cumulative_output_token
812
+
813
+ # h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
814
+ # h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
815
+
816
+ # h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
817
+ # h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
818
+
819
+ # ppt_to_images(h1_path, f'{poster_log_path}/layout_h1')
820
+ # ppt_to_images(h2_path, f'{poster_log_path}/layout_h2')
821
+ # ppt_to_images(h1_filled_path, f'{poster_log_path}/layout_h1_filled')
822
+ # ppt_to_images(h2_filled_path, f'{poster_log_path}/layout_h2_filled')
823
+
824
+ # h1_img = Image.open(f'{poster_log_path}/layout_h1/slide_0001.jpg')
825
+ # h2_img = Image.open(f'{poster_log_path}/layout_h2/slide_0001.jpg')
826
+ # h1_filled_img = Image.open(f'{poster_log_path}/layout_h1_filled/slide_0001.jpg')
827
+ # h2_filled_img = Image.open(f'{poster_log_path}/layout_h2_filled/slide_0001.jpg')
828
+
829
+ # ckpt = {
830
+ # 'logs': logs,
831
+ # 'outline': new_outline,
832
+ # 'name_to_hierarchy': name_to_hierarchy,
833
+ # 'consumption_log': consumption_log,
834
+ # 'total_input_token': total_input_token,
835
+ # 'total_output_token': total_output_token,
836
+ # }
837
+
838
+ # with open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'wb') as f:
839
+ # pkl.dump(ckpt, f)
840
+
841
+ # json.dump(
842
+ # new_outline,
843
+ # open(outline_file_path, "w"),
844
+ # ensure_ascii=False,
845
+ # indent=4,
846
+ # )
847
+
848
+ # return total_input_token, total_output_token
849
 
850
  if __name__ == '__main__':
851
  parser = argparse.ArgumentParser()
Paper2Poster/PosterAgent/new_pipeline.py CHANGED
@@ -2,15 +2,15 @@ import os
2
  print("Initializing...")
3
  from PosterAgent.parse_raw import parse_raw, gen_image_and_table
4
  from PosterAgent.gen_outline_layout import filter_image_table, gen_outline_layout_v2
5
- from utils.wei_utils import get_agent_config, utils_functions, run_code, scale_to_target_area, char_capacity
6
- from PosterAgent.tree_split_layout import main_train, main_inference, get_arrangments_in_inches, split_textbox, to_inches
7
  # from PosterAgent.gen_pptx_code import generate_poster_code
8
  # from utils.src.utils import ppt_to_images
9
  # from PosterAgent.gen_poster_content import gen_bullet_point_content
10
- from utils.ablation_utils import no_tree_get_layout
11
 
12
  # Import refactored utilities
13
- from utils.logo_utils import LogoManager, add_logos_to_poster_code
14
  # from utils.config_utils import (
15
  # load_poster_yaml_config, extract_font_sizes, extract_colors,
16
  # extract_vertical_alignment, extract_section_title_symbol, normalize_config_values
@@ -32,6 +32,14 @@ import time
32
  import shutil
33
 
34
  units_per_inch = 25
 
 
 
 
 
 
 
 
35
 
36
  if __name__ == '__main__':
37
 
 
2
  print("Initializing...")
3
  from PosterAgent.parse_raw import parse_raw, gen_image_and_table
4
  from PosterAgent.gen_outline_layout import filter_image_table, gen_outline_layout_v2
5
+ from utils.wei_utils import get_agent_config, scale_to_target_area
6
+ # from PosterAgent.tree_split_layout import main_train, main_inference, get_arrangments_in_inches, split_textbox, to_inches
7
  # from PosterAgent.gen_pptx_code import generate_poster_code
8
  # from utils.src.utils import ppt_to_images
9
  # from PosterAgent.gen_poster_content import gen_bullet_point_content
10
+ # from utils.ablation_utils import no_tree_get_layout
11
 
12
  # Import refactored utilities
13
+ # from utils.logo_utils import LogoManager, add_logos_to_poster_code
14
  # from utils.config_utils import (
15
  # load_poster_yaml_config, extract_font_sizes, extract_colors,
16
  # extract_vertical_alignment, extract_section_title_symbol, normalize_config_values
 
32
  import shutil
33
 
34
  units_per_inch = 25
35
+ def to_inches(value_in_units, units_per_inch=72):
36
+ """
37
+ Convert a single coordinate or dimension from 'units' to inches.
38
+ For example, if your units are 'points' (72 points = 1 inch),
39
+ then units_per_inch=72.
40
+ If your units are 'pixels' at 96 DPI, then units_per_inch=96.
41
+ """
42
+ return value_in_units / units_per_inch
43
 
44
  if __name__ == '__main__':
45
 
Paper2Poster/PosterAgent/parse_raw.py CHANGED
@@ -27,7 +27,7 @@ import torch
27
  from jinja2 import Template
28
  import re
29
  import argparse
30
-
31
  load_dotenv()
32
  IMAGE_RESOLUTION_SCALE = 5.0
33
 
@@ -41,70 +41,103 @@ doc_converter = DocumentConverter(
41
  InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
42
  }
43
  )
 
 
 
 
 
44
 
45
  @retry(stop=stop_after_attempt(5))
46
  def parse_raw(args, actor_config, version=2):
47
  raw_source = args.poster_path
48
  markdown_clean_pattern = re.compile(r"<!--[\s\S]*?-->")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- raw_result = doc_converter.convert(raw_source)
51
-
52
- raw_markdown = raw_result.document.export_to_markdown()
53
- text_content = markdown_clean_pattern.sub("", raw_markdown)
54
-
55
- if len(text_content) < 500:
56
- print('\nParsing with docling failed, using marker instead\n')
57
- parser_model = create_model_dict(device='cuda', dtype=torch.float16)
58
- text_content, rendered = parse_pdf(raw_source, model_lst=parser_model, save_file=False)
59
-
60
- if version == 1:
61
- template = Template(open("utils/prompts/gen_poster_raw_content.txt").read())
62
- elif version == 2:
63
- print('Using v2 prompt template')
64
- template = Template(open("utils/prompts/gen_poster_raw_content_v2.txt").read())
65
-
66
- if args.model_name_t.startswith('vllm_qwen'):
67
- actor_model = ModelFactory.create(
68
- model_platform=actor_config['model_platform'],
69
- model_type=actor_config['model_type'],
70
- model_config_dict=actor_config['model_config'],
71
- url=actor_config['url'],
72
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
- actor_model = ModelFactory.create(
75
- model_platform=actor_config['model_platform'],
76
- model_type=actor_config['model_type'],
77
- model_config_dict=actor_config['model_config'],
 
 
 
 
78
  )
79
-
80
- actor_sys_msg = 'You are the author of the paper, and you will create a poster for the paper.'
81
-
82
- actor_agent = ChatAgent(
83
- system_message=actor_sys_msg,
84
- model=actor_model,
85
- message_window_size=10,
86
- token_limit=actor_config.get('token_limit', None)
87
- )
88
-
89
- while True:
90
- prompt = template.render(
91
- markdown_document=text_content,
92
- )
93
- actor_agent.reset()
94
- response = actor_agent.step(prompt)
95
- input_token, output_token = account_token(response)
96
-
97
- content_json = get_json_from_response(response.msgs[0].content)
98
-
99
- if len(content_json) > 0:
100
- break
101
- print('Error: Empty response, retrying...')
102
- if args.model_name_t.startswith('vllm_qwen'):
103
- text_content = text_content[:80000]
104
-
105
- if len(content_json['sections']) > 9:
106
- # First 2 sections + randomly select 5 sections + last 2 sections
107
- selected_sections = content_json['sections'][:2] + random.sample(content_json['sections'][2:-2], 5) + content_json['sections'][-2:]
108
  content_json['sections'] = selected_sections
109
 
110
  has_title = False
 
27
  from jinja2 import Template
28
  import re
29
  import argparse
30
+ import os
31
  load_dotenv()
32
  IMAGE_RESOLUTION_SCALE = 5.0
33
 
 
41
  InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
42
  }
43
  )
44
+ def account_token(response):
45
+ input_token = response.info['usage']['prompt_tokens']
46
+ output_token = response.info['usage']['completion_tokens']
47
+
48
+ return input_token, output_token
49
 
50
  @retry(stop=stop_after_attempt(5))
51
  def parse_raw(args, actor_config, version=2):
52
  raw_source = args.poster_path
53
  markdown_clean_pattern = re.compile(r"<!--[\s\S]*?-->")
54
+ print(f'\nParsing raw content from {raw_source}...\n')
55
+ try:
56
+ raw_result = doc_converter.convert(raw_source)
57
+ print('βœ… PDF converted to document format.')
58
+ raw_markdown = raw_result.document.export_to_markdown()
59
+ text_content = markdown_clean_pattern.sub("", raw_markdown)
60
+ print(f'Extracted {len(text_content)} characters from the document.')
61
+ if len(text_content) < 500:
62
+ print('\nParsing with docling failed, using marker instead\n')
63
+ parser_model = create_model_dict(device='cuda', dtype=torch.float16)
64
+ text_content, rendered = parse_pdf(raw_source, model_lst=parser_model, save_file=False)
65
+ except Exception as e:
66
+ print(f'❌ PDF parsing failed: {e}')
67
+ raise e
68
+
69
+ # Load prompt template safely
70
+ try:
71
+ if version == 1:
72
+ template = Template(open("utils/prompts/gen_poster_raw_content.txt").read())
73
+ elif version == 2:
74
+ print('Using v2 prompt template')
75
+ template = Template(open("utils/prompts/gen_poster_raw_content_v2.txt").read())
76
+ else:
77
+ raise ValueError("Invalid version number.")
78
+ except Exception as e:
79
+ print(f'❌ Failed to load prompt template: {e}')
80
+ raise e
81
+
82
+ # Initialize actor model
83
+ try:
84
+ if args.model_name_t.startswith('vllm_qwen'):
85
+ actor_model = ModelFactory.create(
86
+ model_platform=actor_config['model_platform'],
87
+ model_type=actor_config['model_type'],
88
+ model_config_dict=actor_config['model_config'],
89
+ url=actor_config['url'],
90
+ )
91
+ else:
92
+ actor_model = ModelFactory.create(
93
+ model_platform=actor_config['model_platform'],
94
+ model_type=actor_config['model_type'],
95
+ model_config_dict=actor_config['model_config'],
96
+ )
97
 
98
+ actor_sys_msg = 'You are the author of the paper, and you will create a poster for the paper.'
99
+ actor_agent = ChatAgent(
100
+ system_message=actor_sys_msg,
101
+ model=actor_model,
102
+ message_window_size=10,
103
+ token_limit=actor_config.get('token_limit', None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  )
105
+ except Exception as e:
106
+ print(f'❌ Failed to initialize actor model: {e}')
107
+ raise e
108
+
109
+ # === main conversation loop ===
110
+ max_retry = 5
111
+ content_json = {}
112
+ for attempt in range(max_retry):
113
+ try:
114
+ print(f'\nπŸŒ€ Generating poster content... (Attempt {attempt+1}/{max_retry})')
115
+ prompt = template.render(markdown_document=text_content)
116
+ actor_agent.reset()
117
+ response = actor_agent.step(prompt)
118
+ input_token, output_token = account_token(response)
119
+
120
+ content_json = get_json_from_response(response.msgs[0].content)
121
+ if len(content_json) > 0:
122
+ print('βœ… Successfully parsed JSON content.')
123
+ break
124
+ else:
125
+ print('⚠️ Empty JSON response, retrying...')
126
+ if args.model_name_t.startswith('vllm_qwen'):
127
+ text_content = text_content[:80000]
128
+ except Exception as e:
129
+ print(f'❌ Error in actor_agent loop: {e}')
130
+ # short delay could be added if needed
131
  else:
132
+ raise RuntimeError("Failed to get valid JSON content after multiple attempts.")
133
+
134
+ # Trim sections if too many
135
+ if len(content_json.get('sections', [])) > 9:
136
+ selected_sections = (
137
+ content_json['sections'][:2]
138
+ + random.sample(content_json['sections'][2:-2], 5)
139
+ + content_json['sections'][-2:]
140
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  content_json['sections'] = selected_sections
142
 
143
  has_title = False
Paper2Poster/utils/__init__.py CHANGED
@@ -1 +1 @@
1
- from . import poster_eval_utils, pptx_utils, wei_utils, critic_utils, ablation_utils, src
 
1
+ from . import wei_utils, src
Paper2Poster/utils/src/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from . import (
2
  model_utils,
3
- presentation,
4
- utils
5
  )
 
1
  from . import (
2
  model_utils,
3
+ # presentation,
4
+ # utils
5
  )
Paper2Poster/utils/src/model_utils.py CHANGED
@@ -14,7 +14,8 @@ from PIL import Image
14
  # from transformers import AutoFeatureExtractor, AutoModel
15
 
16
  # from utils.src.presentation import Presentation, SlidePage
17
- from utils.src.utils import is_image_path, pjoin
 
18
 
19
  # device_count = torch.cuda.device_count()
20
 
 
14
  # from transformers import AutoFeatureExtractor, AutoModel
15
 
16
  # from utils.src.presentation import Presentation, SlidePage
17
+ # from utils.src.utils import is_image_path, pjoin
18
+ pjoin = os.path.join
19
 
20
  # device_count = torch.cuda.device_count()
21
 
Paper2Poster/utils/wei_utils.py CHANGED
@@ -1,27 +1,27 @@
1
- import re
2
- import io
3
- import contextlib
4
- import traceback
5
- from pptx import Presentation
6
- from pptx.enum.shapes import MSO_SHAPE_TYPE, MSO_SHAPE, MSO_AUTO_SHAPE_TYPE
7
- from pptx.util import Inches, Pt
8
- from pptx.dml.color import RGBColor
9
- from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
10
  from camel.types import ModelPlatformType, ModelType
11
  from camel.configs import ChatGPTConfig, QwenConfig, VLLMConfig, OpenRouterConfig, GeminiConfig
12
  import math
13
- from urllib.parse import quote_from_bytes, quote
14
- from PIL import Image
15
- import os
16
- import copy
17
- import io
18
- from utils.src.utils import ppt_to_images
19
- from playwright.sync_api import sync_playwright
20
- from pathlib import Path
21
- from playwright.async_api import async_playwright
22
- import asyncio
23
- from utils.pptx_utils import *
24
- from utils.critic_utils import *
25
 
26
  def get_agent_config(model_type):
27
  agent_config = {}
@@ -196,795 +196,795 @@ def get_agent_config(model_type):
196
  return agent_config
197
 
198
 
199
- def match_response(response):
200
- response_text = response.msgs[0].content
201
 
202
- # This regular expression looks for text between ```python ... ```
203
- pattern = r'```python(.*?)```'
204
- match = re.search(pattern, response_text, flags=re.DOTALL)
205
 
206
- if not match:
207
- pattern = r'```(.*?)```'
208
- match = re.search(pattern, response_text, flags=re.DOTALL)
209
 
210
- if match:
211
- code_snippet = match.group(1).strip()
212
- else:
213
- # If there's no fenced code block, fallback to entire response or handle error
214
- code_snippet = response_text
215
- return code_snippet
216
 
217
- def run_code_with_utils(code, utils_functions):
218
- return run_code(utils_functions + '\n' + code)
219
 
220
- def run_code(code):
221
- """
222
- Execute Python code and capture stdout as well as the full stack trace on error.
223
- Forces __name__ = "__main__" so that if __name__ == "__main__": blocks will run.
224
 
225
- Returns:
226
- (output, error)
227
- - output: string containing everything that was printed to stdout
228
- - error: string containing the full traceback if an exception occurred; None otherwise
229
- """
230
- stdout_capture = io.StringIO()
231
- # Provide a globals dict specifying that __name__ is "__main__"
232
- exec_globals = {"__name__": "__main__"}
233
-
234
- with contextlib.redirect_stdout(stdout_capture):
235
- try:
236
- exec(code, exec_globals)
237
- error = None
238
- except Exception:
239
- # Capture the entire stack trace
240
- error = traceback.format_exc()
241
-
242
- output = stdout_capture.getvalue()
243
- return output, error
244
-
245
-
246
- def run_code_from_agent(agent, msg, num_retries=1):
247
- agent.reset()
248
- log = []
249
- for attempt in range(num_retries + 1): # +1 to include the initial attempt
250
- response = agent.step(msg)
251
- code = match_response(response)
252
- output, error = run_code(code)
253
- log.append((code, output, error))
254
 
255
- if error is None:
256
- return log
257
 
258
- if attempt < num_retries:
259
- print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
260
- msg = error
261
 
262
- return log
263
-
264
- def run_modular(all_code, file_name, with_border=True, with_label=True):
265
- concatenated_code = utils_functions
266
- concatenated_code += "\n".join(all_code.values())
267
- if with_border and with_label:
268
- concatenated_code += add_border_label_function
269
- concatenated_code += create_id_map_function
270
- concatenated_code += save_helper_info_border_label.format(file_name, file_name, file_name)
271
- elif with_border:
272
- concatenated_code += add_border_function
273
- concatenated_code += save_helper_info_border.format(file_name, file_name)
274
- else:
275
- concatenated_code += f'\nposter.save("{file_name}")'
276
- output, error = run_code(concatenated_code)
277
- return concatenated_code, output, error
278
-
279
- def edit_modular(
280
- agent,
281
- edit_section_name,
282
- feedback,
283
- all_code,
284
- file_name,
285
- outline,
286
- content,
287
- images,
288
- actor_prompt,
289
- num_retries=1,
290
- prompt_type='initial'
291
- ):
292
- agent.reset()
293
- log = []
294
- if prompt_type == 'initial':
295
- msg = actor_prompt.format(
296
- outline['meta'],
297
- {edit_section_name: outline[edit_section_name]},
298
- content,
299
- images,
300
- documentation
301
- )
302
- elif prompt_type == 'edit':
303
- assert (edit_section_name == list(feedback.keys())[0])
304
- msg = actor_prompt.format(
305
- edit_section_name,
306
- all_code[edit_section_name],
307
- feedback,
308
- {edit_section_name: outline[edit_section_name]},
309
- content,
310
- images,
311
- documentation
312
- )
313
- elif prompt_type == 'new':
314
- assert (list(feedback.keys())[0] == 'all_good')
315
- msg = actor_prompt.format(
316
- {edit_section_name: outline[edit_section_name]},
317
- content,
318
- images,
319
- documentation
320
- )
321
-
322
- for attempt in range(num_retries + 1):
323
- response = agent.step(msg)
324
- new_code = match_response(response)
325
- all_code_changed = all_code.copy()
326
- all_code_changed[edit_section_name] = new_code
327
- concatenated_code, output, error = run_modular(all_code_changed, file_name, False, False)
328
- log.append({
329
- "code": new_code,
330
- "output": output,
331
- "error": error,
332
- "concatenated_code": concatenated_code
333
- })
334
- if error is None:
335
- return log
336
 
337
- if attempt < num_retries:
338
- print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
339
- msg = error
340
- msg += '\nFix your code and try again. The poster is a single-page pptx.'
341
- if prompt_type != 'initial':
342
- msg += '\nAssume that you have had a Presentation object named "poster" and a slide named "slide".'
343
-
344
- return log
345
-
346
- def add_border_to_all_elements(prs, border_color=RGBColor(255, 0, 0), border_width=Pt(2)):
347
- """
348
- Iterates over all slides and shapes in the Presentation object 'prs'
349
- and applies a red border with the specified width to each shape.
350
 
351
- Args:
352
- prs: The Presentation object to modify.
353
- border_color: An instance of RGBColor for the border color (default is red).
354
- border_width: The width of the border as a Pt value (default is 2 points).
355
- """
356
- for slide in prs.slides:
357
- for shape in slide.shapes:
358
- # Some shapes (like charts or group shapes) might not support border styling
359
- try:
360
- # Set the line fill to be solid and assign the desired color and width.
361
- shape.line.fill.solid()
362
- shape.line.fill.fore_color.rgb = border_color
363
- shape.line.width = border_width
364
- except Exception as e:
365
- # If a shape doesn't support setting a border, print a message and continue.
366
- print(f"Could not add border to shape {shape.shape_type}: {e}")
367
-
368
-
369
- # 1 point = 12700 EMUs (helper function)
370
- def pt_to_emu(points: float) -> int:
371
- return int(points * 12700)
372
-
373
- def add_border_and_labels(
374
- prs,
375
- border_color=RGBColor(255, 0, 0), # Red border for shapes
376
- border_width=Pt(2), # 2-point border width
377
- label_outline_color=RGBColor(0, 0, 255), # Blue outline for label circle
378
- label_text_color=RGBColor(0, 0, 255), # Blue text color
379
- label_diameter_pt=40 # Diameter of the label circle in points
380
- ):
381
- """
382
- Iterates over all slides and shapes in the Presentation 'prs', applies a
383
- red border to each shape, and places a transparent (no fill), blue-outlined
384
- circular label with a blue number in the center of each shape. Labels start
385
- from 0 and increment for every shape that gets a border.
386
-
387
- Args:
388
- prs: The Presentation object to modify.
389
- border_color: RGBColor for the shape border color (default: red).
390
- border_width: The width of the shape border (Pt).
391
- label_outline_color: The outline color for the label circle (default: blue).
392
- label_text_color: The color of the label text (default: blue).
393
- label_diameter_pt: The diameter of the label circle, in points (default: 40).
394
- """
395
- label_diameter_emu = pt_to_emu(label_diameter_pt) # convert diameter (points) to EMUs
396
- label_counter = 0 # Start labeling at 0
397
- labeled_elements = {}
398
-
399
- for slide in prs.slides:
400
- for shape in slide.shapes:
401
- # Skip shapes that are labels themselves
402
- if shape.name.startswith("Label_"):
403
- continue
404
-
405
- try:
406
- # --- 1) Add red border to the shape (if supported) ---
407
- shape.line.fill.solid()
408
- shape.line.fill.fore_color.rgb = border_color
409
- shape.line.width = border_width
410
-
411
- # --- 2) Calculate center for the label circle ---
412
- label_left = shape.left + (shape.width // 2) - (label_diameter_emu // 2)
413
- label_top = shape.top + (shape.height // 2) - (label_diameter_emu // 2)
414
-
415
- # --- 3) Create label circle (an OVAL) in the center of the shape ---
416
- label_shape = slide.shapes.add_shape(
417
- MSO_AUTO_SHAPE_TYPE.OVAL,
418
- label_left,
419
- label_top,
420
- label_diameter_emu,
421
- label_diameter_emu
422
- )
423
- label_shape.name = f"Label_{label_counter}" # so we can skip it later
424
-
425
- # **Make the circle completely transparent** (no fill at all)
426
- label_shape.fill.background()
427
-
428
- # **Give it a blue outline**
429
- label_shape.line.fill.solid()
430
- label_shape.line.fill.fore_color.rgb = label_outline_color
431
- label_shape.line.width = Pt(3)
432
-
433
- # --- 4) Add the label number (centered, blue text) ---
434
- tf = label_shape.text_frame
435
- tf.text = str(label_counter)
436
- paragraph = tf.paragraphs[0]
437
- paragraph.alignment = PP_ALIGN.CENTER
438
-
439
- run = paragraph.runs[0]
440
- font = run.font
441
- font.size = Pt(40) # Larger font
442
- font.bold = True
443
- font.name = "Arial"
444
- font._element.get_or_change_to_solidFill()
445
- font.fill.fore_color.rgb = label_text_color
446
- # Record properties from the original shape and label text.
447
- labeled_elements[label_counter] = {
448
- 'left': f'{shape.left} EMU',
449
- 'top': f'{shape.top} EMU',
450
- 'width': f'{shape.width} EMU',
451
- 'height': f'{shape.height} EMU',
452
- 'font_size': f'{shape.text_frame.font.size} PT' if hasattr(shape, 'text_frame') else None,
453
- }
454
-
455
- # --- 5) Increment label counter (so every shape has a unique label) ---
456
- label_counter += 1
457
-
458
- except Exception as e:
459
- # If the shape doesn't support borders or text, skip gracefully
460
- print(f"Could not add border/label to shape (type={shape.shape_type}): {e}")
461
-
462
- return labeled_elements
463
-
464
-
465
- def fill_content(agent, prompt, num_retries, existing_code=''):
466
- if existing_code == '':
467
- existing_code = utils_functions
468
- agent.reset()
469
- log = []
470
- cumulative_input_token, cumulative_output_token = 0, 0
471
- for attempt in range(num_retries + 1):
472
- response = agent.step(prompt)
473
- input_token, output_token = account_token(response)
474
- cumulative_input_token += input_token
475
- cumulative_output_token += output_token
476
- new_code = match_response(response)
477
- all_code = existing_code + '\n' + new_code
478
-
479
- output, error = run_code(all_code)
480
- log.append({
481
- "code": new_code,
482
- "output": output,
483
- "error": error,
484
- "concatenated_code": all_code,
485
- 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
486
- })
487
-
488
- if error is None:
489
- return log
490
 
491
- if attempt < num_retries:
492
- print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
493
- prompt = error
494
- return log
495
-
496
- def apply_theme(agent, prompt, num_retries, existing_code=''):
497
- return fill_content(agent, prompt, num_retries, existing_code)
498
-
499
- def edit_code(agent, prompt, num_retries, existing_code=''):
500
- return fill_content(agent, prompt, num_retries, existing_code)
501
-
502
- def stylize(agent, prompt, num_retries, existing_code=''):
503
- return fill_content(agent, prompt, num_retries, existing_code)
504
-
505
- def gen_layout(agent, prompt, num_retries, name_to_hierarchy, visual_identifier='', existing_code=''):
506
- if existing_code == '':
507
- existing_code = utils_functions
508
- agent.reset()
509
- log = []
510
- cumulative_input_token, cumulative_output_token = 0, 0
511
- for attempt in range(num_retries + 1):
512
- response = agent.step(prompt)
513
- input_token, output_token = account_token(response)
514
- cumulative_input_token += input_token
515
- cumulative_output_token += output_token
516
- new_code = match_response(response)
517
- all_code = existing_code + '\n' + new_code
518
-
519
- # Save visualizations
520
- all_code += f'''
521
- name_to_hierarchy = {name_to_hierarchy}
522
- identifier = "{visual_identifier}"
523
- get_visual_cues(name_to_hierarchy, identifier)
524
- '''
525
-
526
- output, error = run_code(all_code)
527
- log.append({
528
- "code": new_code,
529
- "output": output,
530
- "error": error,
531
- "concatenated_code": all_code,
532
- 'num_tokens': (input_token, output_token),
533
- 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
534
- })
535
-
536
- if error is None:
537
- return log
538
 
539
- if attempt < num_retries:
540
- print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
541
- prompt = error
542
- return log
543
-
544
- def gen_layout_parallel(agent, prompt, num_retries, existing_code='', slide_width=0, slide_height=0, tmp_name='tmp'):
545
- if existing_code == '':
546
- existing_code = utils_functions
547
 
548
- existing_code += f'''
549
- poster = create_poster(width_inch={slide_width}, height_inch={slide_height})
550
- slide = add_blank_slide(poster)
551
- save_presentation(poster, file_name="poster_{tmp_name}.pptx")
552
- '''
553
- agent.reset()
554
- log = []
555
- cumulative_input_token, cumulative_output_token = 0, 0
556
- for attempt in range(num_retries + 1):
557
- response = agent.step(prompt)
558
- input_token, output_token = account_token(response)
559
- cumulative_input_token += input_token
560
- cumulative_output_token += output_token
561
- new_code = match_response(response)
562
- all_code = existing_code + '\n' + new_code
563
-
564
- output, error = run_code(all_code)
565
- log.append({
566
- "code": new_code,
567
- "output": output,
568
- "error": error,
569
- "concatenated_code": all_code,
570
- 'num_tokens': (input_token, output_token),
571
- 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
572
- })
573
- if output is None or output == '':
574
- prompt = 'No object name printed.'
575
- continue
576
-
577
- if error is None:
578
- return log
579
 
580
- if attempt < num_retries:
581
- # print(f"Retrying... Attempt {attempt + 1} of {num_retries}", flush=True)
582
- prompt = error
583
- return log
584
-
585
- def compute_bullet_length(textbox_content):
586
- total = 0
587
- for bullet in textbox_content:
588
- for run in bullet['runs']:
589
- total += len(run['text'])
590
- return total
591
-
592
- def check_bounding_boxes(bboxes, overall_width, overall_height):
593
- """
594
- Given a dictionary 'bboxes' whose keys are bounding-box names and whose values are
595
- dictionaries with keys 'left', 'top', 'width', and 'height' (all floats),
596
- along with the overall canvas width and height, this function checks for:
597
-
598
- 1) An overlap between any two bounding boxes (it returns a tuple of their names).
599
- 2) A bounding box that extends beyond the overall width or height (it returns a tuple
600
- containing just that bounding box's name).
601
-
602
- It stops upon finding the first error:
603
- - If an overlap is found first, it returns (name1, name2).
604
- - Otherwise, if an overflow is found, it returns (name,).
605
- - If nothing is wrong, it returns ().
606
-
607
- Parameters:
608
- bboxes (dict): e.g. {
609
- "box1": {"left": 10.0, "top": 10.0, "width": 50.0, "height": 20.0},
610
- "box2": {"left": 55.0, "top": 15.0, "width": 10.0, "height": 10.0},
611
- ...
612
- }
613
- overall_width (float): The total width of the available space.
614
- overall_height (float): The total height of the available space.
615
-
616
- Returns:
617
- tuple: Either (box1, box2) if an overlap is found,
618
- (box,) if a bounding box overflows,
619
- or () if no problem is found.
620
- """
621
-
622
- # Convert bboxes into a list of (name, left, top, width, height) for easier iteration.
623
- box_list = []
624
- for name, coords in bboxes.items():
625
- left = coords["left"]
626
- top = coords["top"]
627
- width = coords["width"]
628
- height = coords["height"]
629
- box_list.append((name, left, top, width, height))
630
-
631
- # Helper function to check overlap between two boxes
632
- def boxes_overlap(box_a, box_b):
633
- # Unpack bounding-box data
634
- name_a, left_a, top_a, width_a, height_a = box_a
635
- name_b, left_b, top_b, width_b, height_b = box_b
636
-
637
- # Compute right and bottom coordinates
638
- right_a = left_a + width_a
639
- bottom_a = top_a + height_a
640
- right_b = left_b + width_b
641
- bottom_b = top_b + height_b
642
-
643
- # Rectangles overlap if not separated along either x or y axis
644
- # If one box is completely to the left or right or above or below the other,
645
- # there's no overlap.
646
- no_overlap = (right_a <= left_b or # A is completely left of B
647
- right_b <= left_a or # B is completely left of A
648
- bottom_a <= top_b or # A is completely above B
649
- bottom_b <= top_a) # B is completely above A
650
- return not no_overlap
651
-
652
- # 1) Check for overlap first
653
- n = len(box_list)
654
- for i in range(n):
655
- for j in range(i + 1, n):
656
- if boxes_overlap(box_list[i], box_list[j]):
657
- return (box_list[i][0], box_list[j][0]) # Return names
658
-
659
- # 2) Check for overflow
660
- for name, left, top, width, height in box_list:
661
- right = left + width
662
- bottom = top + height
663
-
664
- # If boundary is outside [0, overall_width] or [0, overall_height], it's an overflow
665
- if (left < 0 or top < 0 or right > overall_width or bottom > overall_height):
666
- return (name,)
667
-
668
- # 3) If nothing is wrong, return empty tuple
669
- return ()
670
-
671
-
672
- def is_poster_filled(
673
- bounding_boxes: dict,
674
- overall_width: float,
675
- overall_height: float,
676
- max_lr_margin: float,
677
- max_tb_margin: float
678
- ) -> bool:
679
- """
680
- Given a dictionary of bounding boxes (keys are box names and
681
- values are dicts with float keys: "left", "top", "width", "height"),
682
- along with the overall dimensions of the poster and maximum allowed
683
- margins, this function determines whether the boxes collectively
684
- fill the poster within those margin constraints.
685
-
686
- :param bounding_boxes: Dictionary of bounding boxes of the form:
687
- {
688
- "box1": {"left": float, "top": float, "width": float, "height": float},
689
- "box2": {...},
690
- ...
691
- }
692
- :param overall_width: Total width of the poster
693
- :param overall_height: Total height of the poster
694
- :param max_lr_margin: Maximum allowed left and right margins
695
- :param max_tb_margin: Maximum allowed top and bottom margins
696
- :return: True if the bounding boxes fill the poster (with no big leftover spaces),
697
- False otherwise.
698
- """
699
-
700
- # If there are no bounding boxes, we consider the poster unfilled.
701
- if not bounding_boxes:
702
- return False
703
-
704
- # Extract the minimum left, maximum right, minimum top, and maximum bottom from all bounding boxes.
705
- min_left = min(b["left"] for b in bounding_boxes.values())
706
- max_right = max(b["left"] + b["width"] for b in bounding_boxes.values())
707
- min_top = min(b["top"] for b in bounding_boxes.values())
708
- max_bottom = max(b["top"] + b["height"] for b in bounding_boxes.values())
709
-
710
- # Calculate leftover margins.
711
- leftover_left = min_left
712
- leftover_right = overall_width - max_right
713
- leftover_top = min_top
714
- leftover_bottom = overall_height - max_bottom
715
-
716
- # Check if leftover margins exceed the allowed maxima.
717
- if (leftover_left > max_lr_margin or leftover_right > max_lr_margin or
718
- leftover_top > max_tb_margin or leftover_bottom > max_tb_margin):
719
- return False
720
-
721
- return True
722
-
723
- def check_and_fix_subsections(section, subsections):
724
- """
725
- Given a 'section' bounding box and a dictionary of 'subsections',
726
- checks:
727
-
728
- 1) That each subsection is within the main section and that
729
- no two subsections overlap.
730
- - If there is a problem, returns a tuple of the names of
731
- the offending subsections.
732
-
733
- 2) That the subsections fully occupy the area of 'section'.
734
- - If not, greedily expand each subsection (in the order
735
- left->right->top->bottom), and return a dictionary of
736
- the updated bounding boxes for the subsections.
737
-
738
- 3) Otherwise, returns an empty tuple if nothing is wrong.
739
-
740
- :param section: dict with keys "left", "top", "width", "height".
741
- :param subsections: dict mapping name -> dict with "left", "top", "width", "height".
742
- :return: Either
743
- - tuple of subsection names that are out of bounds or overlapping,
744
- - dict of expanded bounding boxes if they do not fully occupy 'section',
745
- - or an empty tuple if everything is correct.
746
- """
747
-
748
- # --- Utility functions ---
749
- def right(rect):
750
- return rect["left"] + rect["width"]
751
-
752
- def bottom(rect):
753
- return rect["top"] + rect["height"]
754
-
755
- def is_overlapping(r1, r2):
756
- """
757
- Returns True if rectangles r1 and r2 overlap (strictly),
758
- False otherwise.
759
- """
760
- return not (
761
- right(r1) <= r2["left"]
762
- or r1["left"] >= right(r2)
763
- or bottom(r1) <= r2["top"]
764
- or r1["top"] >= bottom(r2)
765
- )
766
-
767
- # 1) Check each subsection is within the main section
768
- names_violating = set()
769
- sec_left, sec_top = section["left"], section["top"]
770
- sec_right = section["left"] + section["width"]
771
- sec_bottom = section["top"] + section["height"]
772
-
773
- for name, sub in subsections.items():
774
- # Check boundary
775
- sub_left, sub_top = sub["left"], sub["top"]
776
- sub_right, sub_bottom = right(sub), bottom(sub)
777
- if (
778
- sub_left < sec_left
779
- or sub_top < sec_top
780
- or sub_right > sec_right
781
- or sub_bottom > sec_bottom
782
- ):
783
- # Out of bounds
784
- names_violating.add(name)
785
-
786
- # 2) Check pairwise overlaps
787
- sub_keys = list(subsections.keys())
788
- for i in range(len(sub_keys)):
789
- for j in range(i + 1, len(sub_keys)):
790
- n1, n2 = sub_keys[i], sub_keys[j]
791
- if is_overlapping(subsections[n1], subsections[n2]):
792
- # Mark both as violating
793
- names_violating.add(n1)
794
- names_violating.add(n2)
795
-
796
- # If anything violated boundaries or overlapped, return them as a tuple
797
- if names_violating:
798
- return tuple(sorted(names_violating))
799
-
800
- # 3) Check if subsections fully occupy the section by area.
801
- # (Since we've checked there's no overlap, area-based check is safe for "full coverage".)
802
- area_section = section["width"] * section["height"]
803
- area_subs = sum(
804
- sub["width"] * sub["height"] for sub in subsections.values()
805
- )
806
-
807
- if area_subs < area_section:
808
- # -- We need to expand subsections greedily. --
809
-
810
- # Make a copy of the bounding boxes so as not to modify originals.
811
- expanded_subs = {
812
- name: {
813
- "left": sub["left"],
814
- "top": sub["top"],
815
- "width": sub["width"],
816
- "height": sub["height"],
817
- }
818
- for name, sub in subsections.items()
819
- }
820
-
821
- # Helper to see whether we are touching a boundary or another subsection
822
- def touching_left(sname, sbox):
823
- if abs(sbox["left"] - sec_left) < 1e-9:
824
- # touches main section left boundary
825
- return True
826
- # touches the right edge of another subsection
827
- for oname, obox in expanded_subs.items():
828
- if oname == sname:
829
- continue
830
- if abs(right(obox) - sbox["left"]) < 1e-9:
831
- return True
832
- return False
833
-
834
- def touching_right(sname, sbox):
835
- r = right(sbox)
836
- if abs(r - sec_right) < 1e-9:
837
- return True
838
- for oname, obox in expanded_subs.items():
839
- if oname == sname:
840
- continue
841
- if abs(obox["left"] - r) < 1e-9:
842
- return True
843
- return False
844
-
845
- def touching_top(sname, sbox):
846
- if abs(sbox["top"] - sec_top) < 1e-9:
847
- return True
848
- for oname, obox in expanded_subs.items():
849
- if oname == sname:
850
- continue
851
- if abs(bottom(obox) - sbox["top"]) < 1e-9:
852
- return True
853
- return False
854
-
855
- def touching_bottom(sname, sbox):
856
- b = bottom(sbox)
857
- if abs(b - sec_bottom) < 1e-9:
858
- return True
859
- for oname, obox in expanded_subs.items():
860
- if oname == sname:
861
- continue
862
- if abs(obox["top"] - b) < 1e-9:
863
- return True
864
- return False
865
-
866
- # Attempt a single pass of expansions, left->right->top->bottom
867
- for name in expanded_subs:
868
- sub = expanded_subs[name]
869
-
870
- # Expand left if not touching left boundary or another box
871
- if not touching_left(name, sub):
872
- # The "left boundary" is the maximum "right" of any subsection strictly to the left,
873
- # or the section's left boundary, whichever is larger.
874
- left_bound = sec_left
875
- for oname, obox in expanded_subs.items():
876
- if oname == name:
877
- continue
878
- r_ = obox["left"] + obox["width"]
879
- # only consider those that are strictly left of this sub
880
- if r_ <= sub["left"] and r_ > left_bound:
881
- left_bound = r_
882
- # Now expand
883
- delta = sub["left"] - left_bound
884
- if delta > 1e-9: # If there's any real gap
885
- sub["width"] += delta
886
- sub["left"] = left_bound
887
-
888
- # Expand right if not touching right boundary or another box
889
- if not touching_right(name, sub):
890
- right_bound = sec_right
891
- sub_right = sub["left"] + sub["width"]
892
- for oname, obox in expanded_subs.items():
893
- if oname == name:
894
- continue
895
- left_ = obox["left"]
896
- # only consider those that are strictly to the right
897
- if left_ >= sub_right and left_ < right_bound:
898
- right_bound = left_
899
- delta = right_bound - (sub["left"] + sub["width"])
900
- if delta > 1e-9:
901
- sub["width"] += delta
902
-
903
- # Expand top if not touching top boundary or another box
904
- if not touching_top(name, sub):
905
- top_bound = sec_top
906
- for oname, obox in expanded_subs.items():
907
- if oname == name:
908
- continue
909
- b_ = obox["top"] + obox["height"]
910
- if b_ <= sub["top"] and b_ > top_bound:
911
- top_bound = b_
912
- delta = sub["top"] - top_bound
913
- if delta > 1e-9:
914
- sub["height"] += delta
915
- sub["top"] = top_bound
916
-
917
- # Expand bottom if not touching bottom boundary or another box
918
- if not touching_bottom(name, sub):
919
- bottom_bound = sec_bottom
920
- sub_bottom = sub["top"] + sub["height"]
921
- for oname, obox in expanded_subs.items():
922
- if oname == name:
923
- continue
924
- other_top = obox["top"]
925
- if other_top >= sub_bottom and other_top < bottom_bound:
926
- bottom_bound = other_top
927
- delta = bottom_bound - (sub["top"] + sub["height"])
928
- if delta > 1e-9:
929
- sub["height"] += delta
930
-
931
- # After expansion, return the expanded dictionary
932
- # per the spec: "If the second case happens, return a dictionary ...
933
- # containing the modified bounding box dictionaries."
934
- return expanded_subs
935
-
936
- # If we get here, then area_subs == area_section and there's no overlap => all good
937
- return ()
938
-
939
- async def rendered_dims(html: Path) -> tuple[int, int]:
940
- async with async_playwright() as p:
941
- browser = await p.chromium.launch()
942
- page = await browser.new_page() # no fixed viewport yet
943
- resolved = html.resolve()
944
- # quote_from_bytes expects bytes, so we encode the path as UTF‐8:
945
- url = "file://" + quote_from_bytes(str(resolved).encode("utf-8"), safe="/:")
946
- await page.goto(url, wait_until="networkidle")
947
-
948
- # 1) bounding-box of <body>
949
- body_box = await page.eval_on_selector(
950
- "body",
951
- "el => el.getBoundingClientRect()")
952
- w = int(body_box["width"])
953
- h = int(body_box["height"])
954
-
955
- await browser.close()
956
- return w, h
957
 
958
 
959
- def html_to_png(html_abs_path, poster_width_default, poster_height_default, output_path):
960
- html_file = html_abs_path
961
 
962
- try:
963
- w, h = asyncio.run(rendered_dims(html_file))
964
- except:
965
- w = poster_width_default
966
- h = poster_height_default
967
 
968
- with sync_playwright() as p:
969
- path_posix = Path(html_file).resolve().as_posix()
970
 
971
- file_url = "file://" + quote(path_posix, safe="/:")
972
- browser = p.chromium.launch()
973
- page = browser.new_page(viewport={"width": w, "height": h})
974
- page.goto(file_url, wait_until='networkidle')
975
- page.screenshot(path=output_path, full_page=True)
976
- browser.close()
977
 
978
- def account_token(response):
979
- input_token = response.info['usage']['prompt_tokens']
980
- output_token = response.info['usage']['completion_tokens']
981
 
982
- return input_token, output_token
983
 
984
- def style_bullet_content(bullet_content_item, color, fill_color):
985
- for i in range(len(bullet_content_item)):
986
- bullet_content_item[i]['runs'][0]['color'] = color
987
- bullet_content_item[i]['runs'][0]['fill_color'] = fill_color
988
 
989
  def scale_to_target_area(width, height, target_width=900, target_height=1200):
990
  """
@@ -1016,334 +1016,334 @@ def scale_to_target_area(width, height, target_width=900, target_height=1200):
1016
  # Optional: Round the dimensions to integers.
1017
  return int(round(new_width)), int(round(new_height))
1018
 
1019
- def char_capacity(
1020
- bbox,
1021
- font_size_px=40 * (96 / 72), # Default font size in px (40pt converted to px)
1022
- *,
1023
- # Average glyph width as fraction of font-size (β‰ˆ0.6 for monospace,
1024
- # β‰ˆ0.52–0.55 for most proportional sans-serif faces)
1025
- avg_width_ratio: float = 0.54,
1026
- line_height_ratio: float = 1,
1027
- # Optional inner padding in px that the renderer might reserve
1028
- padding_px: int = 0,
1029
- ) -> int:
1030
- """
1031
- Estimate the number of characters that will fit into a rectangular text box.
1032
-
1033
- Parameters
1034
- ----------
1035
- bbox : (x, y, height, width) # all in pixels
1036
- font_size_px : int # font size in px
1037
- avg_width_ratio : float # average char width Γ· fontSize
1038
- line_height_ratio : float # line height Γ· fontSize
1039
- padding_px : int # optional inner padding on each side
1040
-
1041
- Returns
1042
- -------
1043
- int : estimated character capacity
1044
- """
1045
- CHAR_CONST = 10
1046
- _, _, height_px, width_px = bbox
1047
-
1048
- usable_w = max(0, width_px - 2 * padding_px)
1049
- usable_h = max(0, height_px - 2 * padding_px)
1050
-
1051
- if usable_w == 0 or usable_h == 0:
1052
- return 0 # box is too small
1053
-
1054
- avg_char_w = font_size_px * avg_width_ratio
1055
- line_height = font_size_px * line_height_ratio
1056
-
1057
- chars_per_line = max(1, math.floor(usable_w / avg_char_w))
1058
- lines = max(1, math.floor(usable_h / line_height))
1059
-
1060
- return chars_per_line * lines * CHAR_CONST
1061
-
1062
- def estimate_characters(width_in_inches, height_in_inches, font_size_points, line_spacing_points=None):
1063
- """
1064
- Estimate the number of characters that can fit into a bounding box.
1065
-
1066
- :param width_in_inches: The width of the bounding box, in inches.
1067
- :param height_in_inches: The height of the bounding box, in inches.
1068
- :param font_size_points: The font size, in points.
1069
- :param line_spacing_points: (Optional) The line spacing, in points.
1070
- Defaults to 1.5 Γ— font_size_points if not provided.
1071
- :return: Estimated number of characters that fit in the bounding box.
1072
- """
1073
- if line_spacing_points is None:
1074
- # Default line spacing is 1.5 times the font size
1075
- line_spacing_points = 1.5 * font_size_points
1076
-
1077
- # 1 inch = 72 points
1078
- width_in_points = width_in_inches * 72
1079
- height_in_points = height_in_inches * 72
1080
-
1081
- # Rough approximation of the average width of a character: half of the font size
1082
- avg_char_width = 0.5 * font_size_points
1083
-
1084
- # Number of characters that can fit per line
1085
- chars_per_line = int(width_in_points // avg_char_width)
1086
-
1087
- # Number of lines that can fit in the bounding box
1088
- lines_count = int(height_in_points // line_spacing_points)
1089
-
1090
- # Total number of characters
1091
- total_characters = chars_per_line * lines_count
1092
-
1093
- return total_characters
1094
-
1095
- def equivalent_length_with_forced_breaks(text, width_in_inches, font_size_points):
1096
- """
1097
- Returns the "width-equivalent length" of the text when forced newlines
1098
- are respected. Each physical line (including partial) is counted as if it
1099
- had 'max_chars_per_line' characters.
1100
 
1101
- This number can exceed len(text), because forced newlines waste leftover
1102
- space on the line.
1103
- """
1104
- # 1 inch = 72 points
1105
- width_in_points = width_in_inches * 72
1106
- avg_char_width = 0.5 * font_size_points
1107
-
1108
- # How many characters fit in one fully occupied line?
1109
- max_chars_per_line = int(width_in_points // avg_char_width)
1110
-
1111
- # Split on explicit newlines
1112
- logical_lines = text.split('\n')
1113
-
1114
- total_equiv_length = 0
1115
-
1116
- for line in logical_lines:
1117
- # If the line is empty, we still "use" one line (which is max_chars_per_line slots).
1118
- if not line:
1119
- total_equiv_length += max_chars_per_line
1120
- continue
1121
-
1122
- line_length = len(line)
1123
- # How many sub-lines (wraps) does it need?
1124
- sub_lines = math.ceil(line_length / max_chars_per_line)
1125
-
1126
- # Each sub-line is effectively counted as if it were fully used
1127
- total_equiv_length += sub_lines * max_chars_per_line
1128
-
1129
- return total_equiv_length
1130
-
1131
- def actual_rendered_length(
1132
- text,
1133
- width_in_inches,
1134
- height_in_inches,
1135
- font_size_points,
1136
- line_spacing_points=None
1137
- ):
1138
- """
1139
- Estimate how many characters from `text` will actually fit in the bounding
1140
- box, taking into account explicit newlines.
1141
- """
1142
- if line_spacing_points is None:
1143
- line_spacing_points = 1.5 * font_size_points
1144
-
1145
- # 1 inch = 72 points
1146
- width_in_points = width_in_inches * 72
1147
- height_in_points = height_in_inches * 72
1148
-
1149
- # Estimate average character width
1150
- avg_char_width = 0.5 * font_size_points
1151
-
1152
- # Maximum chars per line (approx)
1153
- max_chars_per_line = int(width_in_points // avg_char_width)
1154
-
1155
- # Maximum number of lines that can fit
1156
- max_lines = int(height_in_points // line_spacing_points)
1157
-
1158
- # Split on newline chars to get individual "logical" lines
1159
- logical_lines = text.split('\n')
1160
-
1161
- used_lines = 0
1162
- displayed_chars = 0
1163
-
1164
- for line in logical_lines:
1165
- # If the line is empty, it still takes one printed line
1166
- if not line:
1167
- used_lines += 1
1168
- # Stop if we exceed available lines
1169
- if used_lines >= max_lines:
1170
- break
1171
- continue
1172
-
1173
- # Number of sub-lines the text will occupy if it wraps
1174
- sub_lines = math.ceil(len(line) / max_chars_per_line)
1175
-
1176
- # If we don't exceed the bounding box's vertical capacity
1177
- if used_lines + sub_lines <= max_lines:
1178
- # All chars fit within the bounding box
1179
- displayed_chars += len(line)
1180
- used_lines += sub_lines
1181
- else:
1182
- # Only part of this line will fit
1183
- lines_left = max_lines - used_lines
1184
- if lines_left <= 0:
1185
- # No space left at all
1186
- break
1187
-
1188
- # We can render only `lines_left` sub-lines of this line
1189
- # That means we can render up to:
1190
- chars_that_fit = lines_left * max_chars_per_line
1191
-
1192
- # Clip to the actual number of characters
1193
- chars_that_fit = min(chars_that_fit, len(line))
1194
-
1195
- displayed_chars += chars_that_fit
1196
- used_lines += lines_left # We've used up all remaining lines
1197
- break # No more space in the bounding box
1198
-
1199
- return displayed_chars
1200
-
1201
-
1202
- def remove_hierarchy_and_id(data):
1203
- """
1204
- Recursively remove the 'hierarchy' and 'id' fields from a nested
1205
- dictionary representing sections and subsections.
1206
- """
1207
- if isinstance(data, dict):
1208
- # Create a new dict to store filtered data
1209
- new_data = {}
1210
- for key, value in data.items():
1211
- # Skip the keys "hierarchy" and "id"
1212
- if key in ("hierarchy", "id", 'location'):
1213
- continue
1214
- # Recursively process the value
1215
- new_data[key] = remove_hierarchy_and_id(value)
1216
- return new_data
1217
- elif isinstance(data, list):
1218
- # If it's a list, process each item recursively
1219
- return [remove_hierarchy_and_id(item) for item in data]
1220
- else:
1221
- # Base case: if it's neither dict nor list, just return the value as is
1222
- return data
1223
 
1224
- def outline_estimate_num_chars(outline):
1225
- for k, v in outline.items():
1226
- if k == 'meta':
1227
- continue
1228
- if 'title' in k.lower() or 'author' in k.lower() or 'reference' in k.lower():
1229
- continue
1230
- if not 'subsections' in v:
1231
- num_chars = estimate_characters(
1232
- v['location']['width'],
1233
- v['location']['height'],
1234
- 60, line_spacing_points=None
1235
- )
1236
- v['num_chars'] = num_chars
1237
- else:
1238
- for k_sub, v_sub in v['subsections'].items():
1239
- if 'title' in k_sub.lower():
1240
- continue
1241
- if 'path' in v_sub:
1242
- continue
1243
- num_chars = estimate_characters(
1244
- v_sub['location']['width'],
1245
- v_sub['location']['height'],
1246
- 60, line_spacing_points=None
1247
- )
1248
- v_sub['num_chars'] = num_chars
1249
-
1250
- def generate_length_suggestions(result_json, original_section_outline, raw_section_outline):
1251
- NOT_CHANGE = 'Do not change text.'
1252
- original_section_outline = json.loads(original_section_outline)
1253
- suggestion_flag = False
1254
- new_section_outline = copy.deepcopy(result_json)
1255
- def check_length(text, target, width, height):
1256
- text_length = equivalent_length_with_forced_breaks(
1257
- text,
1258
- width,
1259
- font_size_points=60,
1260
- )
1261
- if text_length - target > 100:
1262
- return f'Text too long, shrink by {text_length - target} characters.'
1263
- elif target - text_length > 100:
1264
- return f'Text too short, expand by {target - text_length} characters.'
1265
- else:
1266
- return NOT_CHANGE
1267
-
1268
- if 'num_chars' in original_section_outline:
1269
- new_section_outline['suggestions'] = check_length(
1270
- result_json['description'],
1271
- original_section_outline['num_chars'],
1272
- raw_section_outline['location']['width'],
1273
- raw_section_outline['location']['height']
1274
- )
1275
- if new_section_outline['suggestions'] != NOT_CHANGE:
1276
- suggestion_flag = True
1277
- if 'subsections' in original_section_outline:
1278
- for k, v in original_section_outline['subsections'].items():
1279
- if 'num_chars' in v:
1280
- new_section_outline['subsections'][k]['suggestion'] = check_length(
1281
- result_json['subsections'][k]['description'],
1282
- v['num_chars'],
1283
- raw_section_outline['subsections'][k]['location']['width'],
1284
- raw_section_outline['subsections'][k]['location']['height']
1285
- )
1286
- if new_section_outline['subsections'][k]['suggestion'] != NOT_CHANGE:
1287
- suggestion_flag = True
1288
-
1289
- return new_section_outline, suggestion_flag
1290
-
1291
- def get_img_ratio(img_path):
1292
- img = Image.open(img_path)
1293
- return {
1294
- 'width': img.width,
1295
- 'height': img.height
1296
- }
1297
-
1298
- def get_img_ratio_in_section(content_json):
1299
- res = {}
1300
- if 'path' in content_json:
1301
- res[content_json['path']] = get_img_ratio(content_json['path'])
1302
-
1303
- if 'subsections' in content_json:
1304
- for subsection_name, val in content_json['subsections'].items():
1305
- if 'path' in val:
1306
- res[val['path']] = get_img_ratio(val['path'])
1307
-
1308
- return res
1309
-
1310
-
1311
- def get_snapshot_from_section(leaf_section, section_name, name_to_hierarchy, leaf_name, section_code, empty_poster_path='poster.pptx'):
1312
- hierarchy = name_to_hierarchy[leaf_name]
1313
- hierarchy_overflow_name = f'tmp/overflow_check_<{section_name}>_<{leaf_section}>_hierarchy_{hierarchy}'
1314
- run_code_with_utils(section_code, utils_functions)
1315
- poster = Presentation(empty_poster_path)
1316
- # add border regardless of the hierarchy
1317
- curr_location = add_border_hierarchy(
1318
- poster,
1319
- name_to_hierarchy,
1320
- hierarchy,
1321
- border_width=10,
1322
- # regardless=True
1323
- )
1324
- if not leaf_section in curr_location:
1325
- leaf_section = section_name
1326
- save_presentation(poster, file_name=f"{hierarchy_overflow_name}.pptx")
1327
- ppt_to_images(
1328
- f"{hierarchy_overflow_name}.pptx",
1329
- hierarchy_overflow_name,
1330
- dpi=200
1331
- )
1332
- poster_image_path = os.path.join(f"{hierarchy_overflow_name}", "slide_0001.jpg")
1333
- poster_image = Image.open(poster_image_path)
1334
-
1335
- poster_width = emu_to_inches(poster.slide_width)
1336
- poster_height = emu_to_inches(poster.slide_height)
1337
- locations = convert_pptx_bboxes_json_to_image_json(
1338
- curr_location,
1339
- poster_width,
1340
- poster_height
1341
- )
1342
- zoomed_in_img = zoom_in_image_by_bbox(
1343
- poster_image,
1344
- locations[leaf_name],
1345
- padding=0.01
1346
- )
1347
- # save the zoomed_in_img
1348
- zoomed_in_img.save(f"{hierarchy_overflow_name}_zoomed_in.jpg")
1349
- return curr_location, zoomed_in_img, f"{hierarchy_overflow_name}_zoomed_in.jpg"
 
1
+ # import re
2
+ # import io
3
+ # import contextlib
4
+ # import traceback
5
+ # from pptx import Presentation
6
+ # from pptx.enum.shapes import MSO_SHAPE_TYPE, MSO_SHAPE, MSO_AUTO_SHAPE_TYPE
7
+ # from pptx.util import Inches, Pt
8
+ # from pptx.dml.color import RGBColor
9
+ # from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
10
  from camel.types import ModelPlatformType, ModelType
11
  from camel.configs import ChatGPTConfig, QwenConfig, VLLMConfig, OpenRouterConfig, GeminiConfig
12
  import math
13
+ # from urllib.parse import quote_from_bytes, quote
14
+ # from PIL import Image
15
+ # import os
16
+ # import copy
17
+ # import io
18
+ # from utils.src.utils import ppt_to_images
19
+ # from playwright.sync_api import sync_playwright
20
+ # from pathlib import Path
21
+ # from playwright.async_api import async_playwright
22
+ # import asyncio
23
+ # from utils.pptx_utils import *
24
+ # from utils.critic_utils import *
25
 
26
  def get_agent_config(model_type):
27
  agent_config = {}
 
196
  return agent_config
197
 
198
 
199
+ # def match_response(response):
200
+ # response_text = response.msgs[0].content
201
 
202
+ # # This regular expression looks for text between ```python ... ```
203
+ # pattern = r'```python(.*?)```'
204
+ # match = re.search(pattern, response_text, flags=re.DOTALL)
205
 
206
+ # if not match:
207
+ # pattern = r'```(.*?)```'
208
+ # match = re.search(pattern, response_text, flags=re.DOTALL)
209
 
210
+ # if match:
211
+ # code_snippet = match.group(1).strip()
212
+ # else:
213
+ # # If there's no fenced code block, fallback to entire response or handle error
214
+ # code_snippet = response_text
215
+ # return code_snippet
216
 
217
+ # def run_code_with_utils(code, utils_functions):
218
+ # return run_code(utils_functions + '\n' + code)
219
 
220
+ # def run_code(code):
221
+ # """
222
+ # Execute Python code and capture stdout as well as the full stack trace on error.
223
+ # Forces __name__ = "__main__" so that if __name__ == "__main__": blocks will run.
224
 
225
+ # Returns:
226
+ # (output, error)
227
+ # - output: string containing everything that was printed to stdout
228
+ # - error: string containing the full traceback if an exception occurred; None otherwise
229
+ # """
230
+ # stdout_capture = io.StringIO()
231
+ # # Provide a globals dict specifying that __name__ is "__main__"
232
+ # exec_globals = {"__name__": "__main__"}
233
+
234
+ # with contextlib.redirect_stdout(stdout_capture):
235
+ # try:
236
+ # exec(code, exec_globals)
237
+ # error = None
238
+ # except Exception:
239
+ # # Capture the entire stack trace
240
+ # error = traceback.format_exc()
241
+
242
+ # output = stdout_capture.getvalue()
243
+ # return output, error
244
+
245
+
246
+ # def run_code_from_agent(agent, msg, num_retries=1):
247
+ # agent.reset()
248
+ # log = []
249
+ # for attempt in range(num_retries + 1): # +1 to include the initial attempt
250
+ # response = agent.step(msg)
251
+ # code = match_response(response)
252
+ # output, error = run_code(code)
253
+ # log.append((code, output, error))
254
 
255
+ # if error is None:
256
+ # return log
257
 
258
+ # if attempt < num_retries:
259
+ # print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
260
+ # msg = error
261
 
262
+ # return log
263
+
264
+ # def run_modular(all_code, file_name, with_border=True, with_label=True):
265
+ # concatenated_code = utils_functions
266
+ # concatenated_code += "\n".join(all_code.values())
267
+ # if with_border and with_label:
268
+ # concatenated_code += add_border_label_function
269
+ # concatenated_code += create_id_map_function
270
+ # concatenated_code += save_helper_info_border_label.format(file_name, file_name, file_name)
271
+ # elif with_border:
272
+ # concatenated_code += add_border_function
273
+ # concatenated_code += save_helper_info_border.format(file_name, file_name)
274
+ # else:
275
+ # concatenated_code += f'\nposter.save("{file_name}")'
276
+ # output, error = run_code(concatenated_code)
277
+ # return concatenated_code, output, error
278
+
279
+ # def edit_modular(
280
+ # agent,
281
+ # edit_section_name,
282
+ # feedback,
283
+ # all_code,
284
+ # file_name,
285
+ # outline,
286
+ # content,
287
+ # images,
288
+ # actor_prompt,
289
+ # num_retries=1,
290
+ # prompt_type='initial'
291
+ # ):
292
+ # agent.reset()
293
+ # log = []
294
+ # if prompt_type == 'initial':
295
+ # msg = actor_prompt.format(
296
+ # outline['meta'],
297
+ # {edit_section_name: outline[edit_section_name]},
298
+ # content,
299
+ # images,
300
+ # documentation
301
+ # )
302
+ # elif prompt_type == 'edit':
303
+ # assert (edit_section_name == list(feedback.keys())[0])
304
+ # msg = actor_prompt.format(
305
+ # edit_section_name,
306
+ # all_code[edit_section_name],
307
+ # feedback,
308
+ # {edit_section_name: outline[edit_section_name]},
309
+ # content,
310
+ # images,
311
+ # documentation
312
+ # )
313
+ # elif prompt_type == 'new':
314
+ # assert (list(feedback.keys())[0] == 'all_good')
315
+ # msg = actor_prompt.format(
316
+ # {edit_section_name: outline[edit_section_name]},
317
+ # content,
318
+ # images,
319
+ # documentation
320
+ # )
321
+
322
+ # for attempt in range(num_retries + 1):
323
+ # response = agent.step(msg)
324
+ # new_code = match_response(response)
325
+ # all_code_changed = all_code.copy()
326
+ # all_code_changed[edit_section_name] = new_code
327
+ # concatenated_code, output, error = run_modular(all_code_changed, file_name, False, False)
328
+ # log.append({
329
+ # "code": new_code,
330
+ # "output": output,
331
+ # "error": error,
332
+ # "concatenated_code": concatenated_code
333
+ # })
334
+ # if error is None:
335
+ # return log
336
 
337
+ # if attempt < num_retries:
338
+ # print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
339
+ # msg = error
340
+ # msg += '\nFix your code and try again. The poster is a single-page pptx.'
341
+ # if prompt_type != 'initial':
342
+ # msg += '\nAssume that you have had a Presentation object named "poster" and a slide named "slide".'
343
+
344
+ # return log
345
+
346
+ # def add_border_to_all_elements(prs, border_color=RGBColor(255, 0, 0), border_width=Pt(2)):
347
+ # """
348
+ # Iterates over all slides and shapes in the Presentation object 'prs'
349
+ # and applies a red border with the specified width to each shape.
350
 
351
+ # Args:
352
+ # prs: The Presentation object to modify.
353
+ # border_color: An instance of RGBColor for the border color (default is red).
354
+ # border_width: The width of the border as a Pt value (default is 2 points).
355
+ # """
356
+ # for slide in prs.slides:
357
+ # for shape in slide.shapes:
358
+ # # Some shapes (like charts or group shapes) might not support border styling
359
+ # try:
360
+ # # Set the line fill to be solid and assign the desired color and width.
361
+ # shape.line.fill.solid()
362
+ # shape.line.fill.fore_color.rgb = border_color
363
+ # shape.line.width = border_width
364
+ # except Exception as e:
365
+ # # If a shape doesn't support setting a border, print a message and continue.
366
+ # print(f"Could not add border to shape {shape.shape_type}: {e}")
367
+
368
+
369
+ # # 1 point = 12700 EMUs (helper function)
370
+ # def pt_to_emu(points: float) -> int:
371
+ # return int(points * 12700)
372
+
373
+ # def add_border_and_labels(
374
+ # prs,
375
+ # border_color=RGBColor(255, 0, 0), # Red border for shapes
376
+ # border_width=Pt(2), # 2-point border width
377
+ # label_outline_color=RGBColor(0, 0, 255), # Blue outline for label circle
378
+ # label_text_color=RGBColor(0, 0, 255), # Blue text color
379
+ # label_diameter_pt=40 # Diameter of the label circle in points
380
+ # ):
381
+ # """
382
+ # Iterates over all slides and shapes in the Presentation 'prs', applies a
383
+ # red border to each shape, and places a transparent (no fill), blue-outlined
384
+ # circular label with a blue number in the center of each shape. Labels start
385
+ # from 0 and increment for every shape that gets a border.
386
+
387
+ # Args:
388
+ # prs: The Presentation object to modify.
389
+ # border_color: RGBColor for the shape border color (default: red).
390
+ # border_width: The width of the shape border (Pt).
391
+ # label_outline_color: The outline color for the label circle (default: blue).
392
+ # label_text_color: The color of the label text (default: blue).
393
+ # label_diameter_pt: The diameter of the label circle, in points (default: 40).
394
+ # """
395
+ # label_diameter_emu = pt_to_emu(label_diameter_pt) # convert diameter (points) to EMUs
396
+ # label_counter = 0 # Start labeling at 0
397
+ # labeled_elements = {}
398
+
399
+ # for slide in prs.slides:
400
+ # for shape in slide.shapes:
401
+ # # Skip shapes that are labels themselves
402
+ # if shape.name.startswith("Label_"):
403
+ # continue
404
+
405
+ # try:
406
+ # # --- 1) Add red border to the shape (if supported) ---
407
+ # shape.line.fill.solid()
408
+ # shape.line.fill.fore_color.rgb = border_color
409
+ # shape.line.width = border_width
410
+
411
+ # # --- 2) Calculate center for the label circle ---
412
+ # label_left = shape.left + (shape.width // 2) - (label_diameter_emu // 2)
413
+ # label_top = shape.top + (shape.height // 2) - (label_diameter_emu // 2)
414
+
415
+ # # --- 3) Create label circle (an OVAL) in the center of the shape ---
416
+ # label_shape = slide.shapes.add_shape(
417
+ # MSO_AUTO_SHAPE_TYPE.OVAL,
418
+ # label_left,
419
+ # label_top,
420
+ # label_diameter_emu,
421
+ # label_diameter_emu
422
+ # )
423
+ # label_shape.name = f"Label_{label_counter}" # so we can skip it later
424
+
425
+ # # **Make the circle completely transparent** (no fill at all)
426
+ # label_shape.fill.background()
427
+
428
+ # # **Give it a blue outline**
429
+ # label_shape.line.fill.solid()
430
+ # label_shape.line.fill.fore_color.rgb = label_outline_color
431
+ # label_shape.line.width = Pt(3)
432
+
433
+ # # --- 4) Add the label number (centered, blue text) ---
434
+ # tf = label_shape.text_frame
435
+ # tf.text = str(label_counter)
436
+ # paragraph = tf.paragraphs[0]
437
+ # paragraph.alignment = PP_ALIGN.CENTER
438
+
439
+ # run = paragraph.runs[0]
440
+ # font = run.font
441
+ # font.size = Pt(40) # Larger font
442
+ # font.bold = True
443
+ # font.name = "Arial"
444
+ # font._element.get_or_change_to_solidFill()
445
+ # font.fill.fore_color.rgb = label_text_color
446
+ # # Record properties from the original shape and label text.
447
+ # labeled_elements[label_counter] = {
448
+ # 'left': f'{shape.left} EMU',
449
+ # 'top': f'{shape.top} EMU',
450
+ # 'width': f'{shape.width} EMU',
451
+ # 'height': f'{shape.height} EMU',
452
+ # 'font_size': f'{shape.text_frame.font.size} PT' if hasattr(shape, 'text_frame') else None,
453
+ # }
454
+
455
+ # # --- 5) Increment label counter (so every shape has a unique label) ---
456
+ # label_counter += 1
457
+
458
+ # except Exception as e:
459
+ # # If the shape doesn't support borders or text, skip gracefully
460
+ # print(f"Could not add border/label to shape (type={shape.shape_type}): {e}")
461
+
462
+ # return labeled_elements
463
+
464
+
465
+ # def fill_content(agent, prompt, num_retries, existing_code=''):
466
+ # if existing_code == '':
467
+ # existing_code = utils_functions
468
+ # agent.reset()
469
+ # log = []
470
+ # cumulative_input_token, cumulative_output_token = 0, 0
471
+ # for attempt in range(num_retries + 1):
472
+ # response = agent.step(prompt)
473
+ # input_token, output_token = account_token(response)
474
+ # cumulative_input_token += input_token
475
+ # cumulative_output_token += output_token
476
+ # new_code = match_response(response)
477
+ # all_code = existing_code + '\n' + new_code
478
+
479
+ # output, error = run_code(all_code)
480
+ # log.append({
481
+ # "code": new_code,
482
+ # "output": output,
483
+ # "error": error,
484
+ # "concatenated_code": all_code,
485
+ # 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
486
+ # })
487
+
488
+ # if error is None:
489
+ # return log
490
 
491
+ # if attempt < num_retries:
492
+ # print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
493
+ # prompt = error
494
+ # return log
495
+
496
+ # def apply_theme(agent, prompt, num_retries, existing_code=''):
497
+ # return fill_content(agent, prompt, num_retries, existing_code)
498
+
499
+ # def edit_code(agent, prompt, num_retries, existing_code=''):
500
+ # return fill_content(agent, prompt, num_retries, existing_code)
501
+
502
+ # def stylize(agent, prompt, num_retries, existing_code=''):
503
+ # return fill_content(agent, prompt, num_retries, existing_code)
504
+
505
+ # def gen_layout(agent, prompt, num_retries, name_to_hierarchy, visual_identifier='', existing_code=''):
506
+ # if existing_code == '':
507
+ # existing_code = utils_functions
508
+ # agent.reset()
509
+ # log = []
510
+ # cumulative_input_token, cumulative_output_token = 0, 0
511
+ # for attempt in range(num_retries + 1):
512
+ # response = agent.step(prompt)
513
+ # input_token, output_token = account_token(response)
514
+ # cumulative_input_token += input_token
515
+ # cumulative_output_token += output_token
516
+ # new_code = match_response(response)
517
+ # all_code = existing_code + '\n' + new_code
518
+
519
+ # # Save visualizations
520
+ # all_code += f'''
521
+ # name_to_hierarchy = {name_to_hierarchy}
522
+ # identifier = "{visual_identifier}"
523
+ # get_visual_cues(name_to_hierarchy, identifier)
524
+ # '''
525
+
526
+ # output, error = run_code(all_code)
527
+ # log.append({
528
+ # "code": new_code,
529
+ # "output": output,
530
+ # "error": error,
531
+ # "concatenated_code": all_code,
532
+ # 'num_tokens': (input_token, output_token),
533
+ # 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
534
+ # })
535
+
536
+ # if error is None:
537
+ # return log
538
 
539
+ # if attempt < num_retries:
540
+ # print(f"Retrying... Attempt {attempt + 1} of {num_retries}")
541
+ # prompt = error
542
+ # return log
543
+
544
+ # def gen_layout_parallel(agent, prompt, num_retries, existing_code='', slide_width=0, slide_height=0, tmp_name='tmp'):
545
+ # if existing_code == '':
546
+ # existing_code = utils_functions
547
 
548
+ # existing_code += f'''
549
+ # poster = create_poster(width_inch={slide_width}, height_inch={slide_height})
550
+ # slide = add_blank_slide(poster)
551
+ # save_presentation(poster, file_name="poster_{tmp_name}.pptx")
552
+ # '''
553
+ # agent.reset()
554
+ # log = []
555
+ # cumulative_input_token, cumulative_output_token = 0, 0
556
+ # for attempt in range(num_retries + 1):
557
+ # response = agent.step(prompt)
558
+ # input_token, output_token = account_token(response)
559
+ # cumulative_input_token += input_token
560
+ # cumulative_output_token += output_token
561
+ # new_code = match_response(response)
562
+ # all_code = existing_code + '\n' + new_code
563
+
564
+ # output, error = run_code(all_code)
565
+ # log.append({
566
+ # "code": new_code,
567
+ # "output": output,
568
+ # "error": error,
569
+ # "concatenated_code": all_code,
570
+ # 'num_tokens': (input_token, output_token),
571
+ # 'cumulative_tokens': (cumulative_input_token, cumulative_output_token)
572
+ # })
573
+ # if output is None or output == '':
574
+ # prompt = 'No object name printed.'
575
+ # continue
576
+
577
+ # if error is None:
578
+ # return log
579
 
580
+ # if attempt < num_retries:
581
+ # # print(f"Retrying... Attempt {attempt + 1} of {num_retries}", flush=True)
582
+ # prompt = error
583
+ # return log
584
+
585
+ # def compute_bullet_length(textbox_content):
586
+ # total = 0
587
+ # for bullet in textbox_content:
588
+ # for run in bullet['runs']:
589
+ # total += len(run['text'])
590
+ # return total
591
+
592
+ # def check_bounding_boxes(bboxes, overall_width, overall_height):
593
+ # """
594
+ # Given a dictionary 'bboxes' whose keys are bounding-box names and whose values are
595
+ # dictionaries with keys 'left', 'top', 'width', and 'height' (all floats),
596
+ # along with the overall canvas width and height, this function checks for:
597
+
598
+ # 1) An overlap between any two bounding boxes (it returns a tuple of their names).
599
+ # 2) A bounding box that extends beyond the overall width or height (it returns a tuple
600
+ # containing just that bounding box's name).
601
+
602
+ # It stops upon finding the first error:
603
+ # - If an overlap is found first, it returns (name1, name2).
604
+ # - Otherwise, if an overflow is found, it returns (name,).
605
+ # - If nothing is wrong, it returns ().
606
+
607
+ # Parameters:
608
+ # bboxes (dict): e.g. {
609
+ # "box1": {"left": 10.0, "top": 10.0, "width": 50.0, "height": 20.0},
610
+ # "box2": {"left": 55.0, "top": 15.0, "width": 10.0, "height": 10.0},
611
+ # ...
612
+ # }
613
+ # overall_width (float): The total width of the available space.
614
+ # overall_height (float): The total height of the available space.
615
+
616
+ # Returns:
617
+ # tuple: Either (box1, box2) if an overlap is found,
618
+ # (box,) if a bounding box overflows,
619
+ # or () if no problem is found.
620
+ # """
621
+
622
+ # # Convert bboxes into a list of (name, left, top, width, height) for easier iteration.
623
+ # box_list = []
624
+ # for name, coords in bboxes.items():
625
+ # left = coords["left"]
626
+ # top = coords["top"]
627
+ # width = coords["width"]
628
+ # height = coords["height"]
629
+ # box_list.append((name, left, top, width, height))
630
+
631
+ # # Helper function to check overlap between two boxes
632
+ # def boxes_overlap(box_a, box_b):
633
+ # # Unpack bounding-box data
634
+ # name_a, left_a, top_a, width_a, height_a = box_a
635
+ # name_b, left_b, top_b, width_b, height_b = box_b
636
+
637
+ # # Compute right and bottom coordinates
638
+ # right_a = left_a + width_a
639
+ # bottom_a = top_a + height_a
640
+ # right_b = left_b + width_b
641
+ # bottom_b = top_b + height_b
642
+
643
+ # # Rectangles overlap if not separated along either x or y axis
644
+ # # If one box is completely to the left or right or above or below the other,
645
+ # # there's no overlap.
646
+ # no_overlap = (right_a <= left_b or # A is completely left of B
647
+ # right_b <= left_a or # B is completely left of A
648
+ # bottom_a <= top_b or # A is completely above B
649
+ # bottom_b <= top_a) # B is completely above A
650
+ # return not no_overlap
651
+
652
+ # # 1) Check for overlap first
653
+ # n = len(box_list)
654
+ # for i in range(n):
655
+ # for j in range(i + 1, n):
656
+ # if boxes_overlap(box_list[i], box_list[j]):
657
+ # return (box_list[i][0], box_list[j][0]) # Return names
658
+
659
+ # # 2) Check for overflow
660
+ # for name, left, top, width, height in box_list:
661
+ # right = left + width
662
+ # bottom = top + height
663
+
664
+ # # If boundary is outside [0, overall_width] or [0, overall_height], it's an overflow
665
+ # if (left < 0 or top < 0 or right > overall_width or bottom > overall_height):
666
+ # return (name,)
667
+
668
+ # # 3) If nothing is wrong, return empty tuple
669
+ # return ()
670
+
671
+
672
+ # def is_poster_filled(
673
+ # bounding_boxes: dict,
674
+ # overall_width: float,
675
+ # overall_height: float,
676
+ # max_lr_margin: float,
677
+ # max_tb_margin: float
678
+ # ) -> bool:
679
+ # """
680
+ # Given a dictionary of bounding boxes (keys are box names and
681
+ # values are dicts with float keys: "left", "top", "width", "height"),
682
+ # along with the overall dimensions of the poster and maximum allowed
683
+ # margins, this function determines whether the boxes collectively
684
+ # fill the poster within those margin constraints.
685
+
686
+ # :param bounding_boxes: Dictionary of bounding boxes of the form:
687
+ # {
688
+ # "box1": {"left": float, "top": float, "width": float, "height": float},
689
+ # "box2": {...},
690
+ # ...
691
+ # }
692
+ # :param overall_width: Total width of the poster
693
+ # :param overall_height: Total height of the poster
694
+ # :param max_lr_margin: Maximum allowed left and right margins
695
+ # :param max_tb_margin: Maximum allowed top and bottom margins
696
+ # :return: True if the bounding boxes fill the poster (with no big leftover spaces),
697
+ # False otherwise.
698
+ # """
699
+
700
+ # # If there are no bounding boxes, we consider the poster unfilled.
701
+ # if not bounding_boxes:
702
+ # return False
703
+
704
+ # # Extract the minimum left, maximum right, minimum top, and maximum bottom from all bounding boxes.
705
+ # min_left = min(b["left"] for b in bounding_boxes.values())
706
+ # max_right = max(b["left"] + b["width"] for b in bounding_boxes.values())
707
+ # min_top = min(b["top"] for b in bounding_boxes.values())
708
+ # max_bottom = max(b["top"] + b["height"] for b in bounding_boxes.values())
709
+
710
+ # # Calculate leftover margins.
711
+ # leftover_left = min_left
712
+ # leftover_right = overall_width - max_right
713
+ # leftover_top = min_top
714
+ # leftover_bottom = overall_height - max_bottom
715
+
716
+ # # Check if leftover margins exceed the allowed maxima.
717
+ # if (leftover_left > max_lr_margin or leftover_right > max_lr_margin or
718
+ # leftover_top > max_tb_margin or leftover_bottom > max_tb_margin):
719
+ # return False
720
+
721
+ # return True
722
+
723
+ # def check_and_fix_subsections(section, subsections):
724
+ # """
725
+ # Given a 'section' bounding box and a dictionary of 'subsections',
726
+ # checks:
727
+
728
+ # 1) That each subsection is within the main section and that
729
+ # no two subsections overlap.
730
+ # - If there is a problem, returns a tuple of the names of
731
+ # the offending subsections.
732
+
733
+ # 2) That the subsections fully occupy the area of 'section'.
734
+ # - If not, greedily expand each subsection (in the order
735
+ # left->right->top->bottom), and return a dictionary of
736
+ # the updated bounding boxes for the subsections.
737
+
738
+ # 3) Otherwise, returns an empty tuple if nothing is wrong.
739
+
740
+ # :param section: dict with keys "left", "top", "width", "height".
741
+ # :param subsections: dict mapping name -> dict with "left", "top", "width", "height".
742
+ # :return: Either
743
+ # - tuple of subsection names that are out of bounds or overlapping,
744
+ # - dict of expanded bounding boxes if they do not fully occupy 'section',
745
+ # - or an empty tuple if everything is correct.
746
+ # """
747
+
748
+ # # --- Utility functions ---
749
+ # def right(rect):
750
+ # return rect["left"] + rect["width"]
751
+
752
+ # def bottom(rect):
753
+ # return rect["top"] + rect["height"]
754
+
755
+ # def is_overlapping(r1, r2):
756
+ # """
757
+ # Returns True if rectangles r1 and r2 overlap (strictly),
758
+ # False otherwise.
759
+ # """
760
+ # return not (
761
+ # right(r1) <= r2["left"]
762
+ # or r1["left"] >= right(r2)
763
+ # or bottom(r1) <= r2["top"]
764
+ # or r1["top"] >= bottom(r2)
765
+ # )
766
+
767
+ # # 1) Check each subsection is within the main section
768
+ # names_violating = set()
769
+ # sec_left, sec_top = section["left"], section["top"]
770
+ # sec_right = section["left"] + section["width"]
771
+ # sec_bottom = section["top"] + section["height"]
772
+
773
+ # for name, sub in subsections.items():
774
+ # # Check boundary
775
+ # sub_left, sub_top = sub["left"], sub["top"]
776
+ # sub_right, sub_bottom = right(sub), bottom(sub)
777
+ # if (
778
+ # sub_left < sec_left
779
+ # or sub_top < sec_top
780
+ # or sub_right > sec_right
781
+ # or sub_bottom > sec_bottom
782
+ # ):
783
+ # # Out of bounds
784
+ # names_violating.add(name)
785
+
786
+ # # 2) Check pairwise overlaps
787
+ # sub_keys = list(subsections.keys())
788
+ # for i in range(len(sub_keys)):
789
+ # for j in range(i + 1, len(sub_keys)):
790
+ # n1, n2 = sub_keys[i], sub_keys[j]
791
+ # if is_overlapping(subsections[n1], subsections[n2]):
792
+ # # Mark both as violating
793
+ # names_violating.add(n1)
794
+ # names_violating.add(n2)
795
+
796
+ # # If anything violated boundaries or overlapped, return them as a tuple
797
+ # if names_violating:
798
+ # return tuple(sorted(names_violating))
799
+
800
+ # # 3) Check if subsections fully occupy the section by area.
801
+ # # (Since we've checked there's no overlap, area-based check is safe for "full coverage".)
802
+ # area_section = section["width"] * section["height"]
803
+ # area_subs = sum(
804
+ # sub["width"] * sub["height"] for sub in subsections.values()
805
+ # )
806
+
807
+ # if area_subs < area_section:
808
+ # # -- We need to expand subsections greedily. --
809
+
810
+ # # Make a copy of the bounding boxes so as not to modify originals.
811
+ # expanded_subs = {
812
+ # name: {
813
+ # "left": sub["left"],
814
+ # "top": sub["top"],
815
+ # "width": sub["width"],
816
+ # "height": sub["height"],
817
+ # }
818
+ # for name, sub in subsections.items()
819
+ # }
820
+
821
+ # # Helper to see whether we are touching a boundary or another subsection
822
+ # def touching_left(sname, sbox):
823
+ # if abs(sbox["left"] - sec_left) < 1e-9:
824
+ # # touches main section left boundary
825
+ # return True
826
+ # # touches the right edge of another subsection
827
+ # for oname, obox in expanded_subs.items():
828
+ # if oname == sname:
829
+ # continue
830
+ # if abs(right(obox) - sbox["left"]) < 1e-9:
831
+ # return True
832
+ # return False
833
+
834
+ # def touching_right(sname, sbox):
835
+ # r = right(sbox)
836
+ # if abs(r - sec_right) < 1e-9:
837
+ # return True
838
+ # for oname, obox in expanded_subs.items():
839
+ # if oname == sname:
840
+ # continue
841
+ # if abs(obox["left"] - r) < 1e-9:
842
+ # return True
843
+ # return False
844
+
845
+ # def touching_top(sname, sbox):
846
+ # if abs(sbox["top"] - sec_top) < 1e-9:
847
+ # return True
848
+ # for oname, obox in expanded_subs.items():
849
+ # if oname == sname:
850
+ # continue
851
+ # if abs(bottom(obox) - sbox["top"]) < 1e-9:
852
+ # return True
853
+ # return False
854
+
855
+ # def touching_bottom(sname, sbox):
856
+ # b = bottom(sbox)
857
+ # if abs(b - sec_bottom) < 1e-9:
858
+ # return True
859
+ # for oname, obox in expanded_subs.items():
860
+ # if oname == sname:
861
+ # continue
862
+ # if abs(obox["top"] - b) < 1e-9:
863
+ # return True
864
+ # return False
865
+
866
+ # # Attempt a single pass of expansions, left->right->top->bottom
867
+ # for name in expanded_subs:
868
+ # sub = expanded_subs[name]
869
+
870
+ # # Expand left if not touching left boundary or another box
871
+ # if not touching_left(name, sub):
872
+ # # The "left boundary" is the maximum "right" of any subsection strictly to the left,
873
+ # # or the section's left boundary, whichever is larger.
874
+ # left_bound = sec_left
875
+ # for oname, obox in expanded_subs.items():
876
+ # if oname == name:
877
+ # continue
878
+ # r_ = obox["left"] + obox["width"]
879
+ # # only consider those that are strictly left of this sub
880
+ # if r_ <= sub["left"] and r_ > left_bound:
881
+ # left_bound = r_
882
+ # # Now expand
883
+ # delta = sub["left"] - left_bound
884
+ # if delta > 1e-9: # If there's any real gap
885
+ # sub["width"] += delta
886
+ # sub["left"] = left_bound
887
+
888
+ # # Expand right if not touching right boundary or another box
889
+ # if not touching_right(name, sub):
890
+ # right_bound = sec_right
891
+ # sub_right = sub["left"] + sub["width"]
892
+ # for oname, obox in expanded_subs.items():
893
+ # if oname == name:
894
+ # continue
895
+ # left_ = obox["left"]
896
+ # # only consider those that are strictly to the right
897
+ # if left_ >= sub_right and left_ < right_bound:
898
+ # right_bound = left_
899
+ # delta = right_bound - (sub["left"] + sub["width"])
900
+ # if delta > 1e-9:
901
+ # sub["width"] += delta
902
+
903
+ # # Expand top if not touching top boundary or another box
904
+ # if not touching_top(name, sub):
905
+ # top_bound = sec_top
906
+ # for oname, obox in expanded_subs.items():
907
+ # if oname == name:
908
+ # continue
909
+ # b_ = obox["top"] + obox["height"]
910
+ # if b_ <= sub["top"] and b_ > top_bound:
911
+ # top_bound = b_
912
+ # delta = sub["top"] - top_bound
913
+ # if delta > 1e-9:
914
+ # sub["height"] += delta
915
+ # sub["top"] = top_bound
916
+
917
+ # # Expand bottom if not touching bottom boundary or another box
918
+ # if not touching_bottom(name, sub):
919
+ # bottom_bound = sec_bottom
920
+ # sub_bottom = sub["top"] + sub["height"]
921
+ # for oname, obox in expanded_subs.items():
922
+ # if oname == name:
923
+ # continue
924
+ # other_top = obox["top"]
925
+ # if other_top >= sub_bottom and other_top < bottom_bound:
926
+ # bottom_bound = other_top
927
+ # delta = bottom_bound - (sub["top"] + sub["height"])
928
+ # if delta > 1e-9:
929
+ # sub["height"] += delta
930
+
931
+ # # After expansion, return the expanded dictionary
932
+ # # per the spec: "If the second case happens, return a dictionary ...
933
+ # # containing the modified bounding box dictionaries."
934
+ # return expanded_subs
935
+
936
+ # # If we get here, then area_subs == area_section and there's no overlap => all good
937
+ # return ()
938
+
939
+ # async def rendered_dims(html: Path) -> tuple[int, int]:
940
+ # async with async_playwright() as p:
941
+ # browser = await p.chromium.launch()
942
+ # page = await browser.new_page() # no fixed viewport yet
943
+ # resolved = html.resolve()
944
+ # # quote_from_bytes expects bytes, so we encode the path as UTF‐8:
945
+ # url = "file://" + quote_from_bytes(str(resolved).encode("utf-8"), safe="/:")
946
+ # await page.goto(url, wait_until="networkidle")
947
+
948
+ # # 1) bounding-box of <body>
949
+ # body_box = await page.eval_on_selector(
950
+ # "body",
951
+ # "el => el.getBoundingClientRect()")
952
+ # w = int(body_box["width"])
953
+ # h = int(body_box["height"])
954
+
955
+ # await browser.close()
956
+ # return w, h
957
 
958
 
959
+ # def html_to_png(html_abs_path, poster_width_default, poster_height_default, output_path):
960
+ # html_file = html_abs_path
961
 
962
+ # try:
963
+ # w, h = asyncio.run(rendered_dims(html_file))
964
+ # except:
965
+ # w = poster_width_default
966
+ # h = poster_height_default
967
 
968
+ # with sync_playwright() as p:
969
+ # path_posix = Path(html_file).resolve().as_posix()
970
 
971
+ # file_url = "file://" + quote(path_posix, safe="/:")
972
+ # browser = p.chromium.launch()
973
+ # page = browser.new_page(viewport={"width": w, "height": h})
974
+ # page.goto(file_url, wait_until='networkidle')
975
+ # page.screenshot(path=output_path, full_page=True)
976
+ # browser.close()
977
 
978
+ # def account_token(response):
979
+ # input_token = response.info['usage']['prompt_tokens']
980
+ # output_token = response.info['usage']['completion_tokens']
981
 
982
+ # return input_token, output_token
983
 
984
+ # def style_bullet_content(bullet_content_item, color, fill_color):
985
+ # for i in range(len(bullet_content_item)):
986
+ # bullet_content_item[i]['runs'][0]['color'] = color
987
+ # bullet_content_item[i]['runs'][0]['fill_color'] = fill_color
988
 
989
  def scale_to_target_area(width, height, target_width=900, target_height=1200):
990
  """
 
1016
  # Optional: Round the dimensions to integers.
1017
  return int(round(new_width)), int(round(new_height))
1018
 
1019
+ # def char_capacity(
1020
+ # bbox,
1021
+ # font_size_px=40 * (96 / 72), # Default font size in px (40pt converted to px)
1022
+ # *,
1023
+ # # Average glyph width as fraction of font-size (β‰ˆ0.6 for monospace,
1024
+ # # β‰ˆ0.52–0.55 for most proportional sans-serif faces)
1025
+ # avg_width_ratio: float = 0.54,
1026
+ # line_height_ratio: float = 1,
1027
+ # # Optional inner padding in px that the renderer might reserve
1028
+ # padding_px: int = 0,
1029
+ # ) -> int:
1030
+ # """
1031
+ # Estimate the number of characters that will fit into a rectangular text box.
1032
+
1033
+ # Parameters
1034
+ # ----------
1035
+ # bbox : (x, y, height, width) # all in pixels
1036
+ # font_size_px : int # font size in px
1037
+ # avg_width_ratio : float # average char width Γ· fontSize
1038
+ # line_height_ratio : float # line height Γ· fontSize
1039
+ # padding_px : int # optional inner padding on each side
1040
+
1041
+ # Returns
1042
+ # -------
1043
+ # int : estimated character capacity
1044
+ # """
1045
+ # CHAR_CONST = 10
1046
+ # _, _, height_px, width_px = bbox
1047
+
1048
+ # usable_w = max(0, width_px - 2 * padding_px)
1049
+ # usable_h = max(0, height_px - 2 * padding_px)
1050
+
1051
+ # if usable_w == 0 or usable_h == 0:
1052
+ # return 0 # box is too small
1053
+
1054
+ # avg_char_w = font_size_px * avg_width_ratio
1055
+ # line_height = font_size_px * line_height_ratio
1056
+
1057
+ # chars_per_line = max(1, math.floor(usable_w / avg_char_w))
1058
+ # lines = max(1, math.floor(usable_h / line_height))
1059
+
1060
+ # return chars_per_line * lines * CHAR_CONST
1061
+
1062
+ # def estimate_characters(width_in_inches, height_in_inches, font_size_points, line_spacing_points=None):
1063
+ # """
1064
+ # Estimate the number of characters that can fit into a bounding box.
1065
+
1066
+ # :param width_in_inches: The width of the bounding box, in inches.
1067
+ # :param height_in_inches: The height of the bounding box, in inches.
1068
+ # :param font_size_points: The font size, in points.
1069
+ # :param line_spacing_points: (Optional) The line spacing, in points.
1070
+ # Defaults to 1.5 Γ— font_size_points if not provided.
1071
+ # :return: Estimated number of characters that fit in the bounding box.
1072
+ # """
1073
+ # if line_spacing_points is None:
1074
+ # # Default line spacing is 1.5 times the font size
1075
+ # line_spacing_points = 1.5 * font_size_points
1076
+
1077
+ # # 1 inch = 72 points
1078
+ # width_in_points = width_in_inches * 72
1079
+ # height_in_points = height_in_inches * 72
1080
+
1081
+ # # Rough approximation of the average width of a character: half of the font size
1082
+ # avg_char_width = 0.5 * font_size_points
1083
+
1084
+ # # Number of characters that can fit per line
1085
+ # chars_per_line = int(width_in_points // avg_char_width)
1086
+
1087
+ # # Number of lines that can fit in the bounding box
1088
+ # lines_count = int(height_in_points // line_spacing_points)
1089
+
1090
+ # # Total number of characters
1091
+ # total_characters = chars_per_line * lines_count
1092
+
1093
+ # return total_characters
1094
+
1095
+ # def equivalent_length_with_forced_breaks(text, width_in_inches, font_size_points):
1096
+ # """
1097
+ # Returns the "width-equivalent length" of the text when forced newlines
1098
+ # are respected. Each physical line (including partial) is counted as if it
1099
+ # had 'max_chars_per_line' characters.
1100
 
1101
+ # This number can exceed len(text), because forced newlines waste leftover
1102
+ # space on the line.
1103
+ # """
1104
+ # # 1 inch = 72 points
1105
+ # width_in_points = width_in_inches * 72
1106
+ # avg_char_width = 0.5 * font_size_points
1107
+
1108
+ # # How many characters fit in one fully occupied line?
1109
+ # max_chars_per_line = int(width_in_points // avg_char_width)
1110
+
1111
+ # # Split on explicit newlines
1112
+ # logical_lines = text.split('\n')
1113
+
1114
+ # total_equiv_length = 0
1115
+
1116
+ # for line in logical_lines:
1117
+ # # If the line is empty, we still "use" one line (which is max_chars_per_line slots).
1118
+ # if not line:
1119
+ # total_equiv_length += max_chars_per_line
1120
+ # continue
1121
+
1122
+ # line_length = len(line)
1123
+ # # How many sub-lines (wraps) does it need?
1124
+ # sub_lines = math.ceil(line_length / max_chars_per_line)
1125
+
1126
+ # # Each sub-line is effectively counted as if it were fully used
1127
+ # total_equiv_length += sub_lines * max_chars_per_line
1128
+
1129
+ # return total_equiv_length
1130
+
1131
+ # def actual_rendered_length(
1132
+ # text,
1133
+ # width_in_inches,
1134
+ # height_in_inches,
1135
+ # font_size_points,
1136
+ # line_spacing_points=None
1137
+ # ):
1138
+ # """
1139
+ # Estimate how many characters from `text` will actually fit in the bounding
1140
+ # box, taking into account explicit newlines.
1141
+ # """
1142
+ # if line_spacing_points is None:
1143
+ # line_spacing_points = 1.5 * font_size_points
1144
+
1145
+ # # 1 inch = 72 points
1146
+ # width_in_points = width_in_inches * 72
1147
+ # height_in_points = height_in_inches * 72
1148
+
1149
+ # # Estimate average character width
1150
+ # avg_char_width = 0.5 * font_size_points
1151
+
1152
+ # # Maximum chars per line (approx)
1153
+ # max_chars_per_line = int(width_in_points // avg_char_width)
1154
+
1155
+ # # Maximum number of lines that can fit
1156
+ # max_lines = int(height_in_points // line_spacing_points)
1157
+
1158
+ # # Split on newline chars to get individual "logical" lines
1159
+ # logical_lines = text.split('\n')
1160
+
1161
+ # used_lines = 0
1162
+ # displayed_chars = 0
1163
+
1164
+ # for line in logical_lines:
1165
+ # # If the line is empty, it still takes one printed line
1166
+ # if not line:
1167
+ # used_lines += 1
1168
+ # # Stop if we exceed available lines
1169
+ # if used_lines >= max_lines:
1170
+ # break
1171
+ # continue
1172
+
1173
+ # # Number of sub-lines the text will occupy if it wraps
1174
+ # sub_lines = math.ceil(len(line) / max_chars_per_line)
1175
+
1176
+ # # If we don't exceed the bounding box's vertical capacity
1177
+ # if used_lines + sub_lines <= max_lines:
1178
+ # # All chars fit within the bounding box
1179
+ # displayed_chars += len(line)
1180
+ # used_lines += sub_lines
1181
+ # else:
1182
+ # # Only part of this line will fit
1183
+ # lines_left = max_lines - used_lines
1184
+ # if lines_left <= 0:
1185
+ # # No space left at all
1186
+ # break
1187
+
1188
+ # # We can render only `lines_left` sub-lines of this line
1189
+ # # That means we can render up to:
1190
+ # chars_that_fit = lines_left * max_chars_per_line
1191
+
1192
+ # # Clip to the actual number of characters
1193
+ # chars_that_fit = min(chars_that_fit, len(line))
1194
+
1195
+ # displayed_chars += chars_that_fit
1196
+ # used_lines += lines_left # We've used up all remaining lines
1197
+ # break # No more space in the bounding box
1198
+
1199
+ # return displayed_chars
1200
+
1201
+
1202
+ # def remove_hierarchy_and_id(data):
1203
+ # """
1204
+ # Recursively remove the 'hierarchy' and 'id' fields from a nested
1205
+ # dictionary representing sections and subsections.
1206
+ # """
1207
+ # if isinstance(data, dict):
1208
+ # # Create a new dict to store filtered data
1209
+ # new_data = {}
1210
+ # for key, value in data.items():
1211
+ # # Skip the keys "hierarchy" and "id"
1212
+ # if key in ("hierarchy", "id", 'location'):
1213
+ # continue
1214
+ # # Recursively process the value
1215
+ # new_data[key] = remove_hierarchy_and_id(value)
1216
+ # return new_data
1217
+ # elif isinstance(data, list):
1218
+ # # If it's a list, process each item recursively
1219
+ # return [remove_hierarchy_and_id(item) for item in data]
1220
+ # else:
1221
+ # # Base case: if it's neither dict nor list, just return the value as is
1222
+ # return data
1223
 
1224
+ # def outline_estimate_num_chars(outline):
1225
+ # for k, v in outline.items():
1226
+ # if k == 'meta':
1227
+ # continue
1228
+ # if 'title' in k.lower() or 'author' in k.lower() or 'reference' in k.lower():
1229
+ # continue
1230
+ # if not 'subsections' in v:
1231
+ # num_chars = estimate_characters(
1232
+ # v['location']['width'],
1233
+ # v['location']['height'],
1234
+ # 60, line_spacing_points=None
1235
+ # )
1236
+ # v['num_chars'] = num_chars
1237
+ # else:
1238
+ # for k_sub, v_sub in v['subsections'].items():
1239
+ # if 'title' in k_sub.lower():
1240
+ # continue
1241
+ # if 'path' in v_sub:
1242
+ # continue
1243
+ # num_chars = estimate_characters(
1244
+ # v_sub['location']['width'],
1245
+ # v_sub['location']['height'],
1246
+ # 60, line_spacing_points=None
1247
+ # )
1248
+ # v_sub['num_chars'] = num_chars
1249
+
1250
+ # def generate_length_suggestions(result_json, original_section_outline, raw_section_outline):
1251
+ # NOT_CHANGE = 'Do not change text.'
1252
+ # original_section_outline = json.loads(original_section_outline)
1253
+ # suggestion_flag = False
1254
+ # new_section_outline = copy.deepcopy(result_json)
1255
+ # def check_length(text, target, width, height):
1256
+ # text_length = equivalent_length_with_forced_breaks(
1257
+ # text,
1258
+ # width,
1259
+ # font_size_points=60,
1260
+ # )
1261
+ # if text_length - target > 100:
1262
+ # return f'Text too long, shrink by {text_length - target} characters.'
1263
+ # elif target - text_length > 100:
1264
+ # return f'Text too short, expand by {target - text_length} characters.'
1265
+ # else:
1266
+ # return NOT_CHANGE
1267
+
1268
+ # if 'num_chars' in original_section_outline:
1269
+ # new_section_outline['suggestions'] = check_length(
1270
+ # result_json['description'],
1271
+ # original_section_outline['num_chars'],
1272
+ # raw_section_outline['location']['width'],
1273
+ # raw_section_outline['location']['height']
1274
+ # )
1275
+ # if new_section_outline['suggestions'] != NOT_CHANGE:
1276
+ # suggestion_flag = True
1277
+ # if 'subsections' in original_section_outline:
1278
+ # for k, v in original_section_outline['subsections'].items():
1279
+ # if 'num_chars' in v:
1280
+ # new_section_outline['subsections'][k]['suggestion'] = check_length(
1281
+ # result_json['subsections'][k]['description'],
1282
+ # v['num_chars'],
1283
+ # raw_section_outline['subsections'][k]['location']['width'],
1284
+ # raw_section_outline['subsections'][k]['location']['height']
1285
+ # )
1286
+ # if new_section_outline['subsections'][k]['suggestion'] != NOT_CHANGE:
1287
+ # suggestion_flag = True
1288
+
1289
+ # return new_section_outline, suggestion_flag
1290
+
1291
+ # def get_img_ratio(img_path):
1292
+ # img = Image.open(img_path)
1293
+ # return {
1294
+ # 'width': img.width,
1295
+ # 'height': img.height
1296
+ # }
1297
+
1298
+ # def get_img_ratio_in_section(content_json):
1299
+ # res = {}
1300
+ # if 'path' in content_json:
1301
+ # res[content_json['path']] = get_img_ratio(content_json['path'])
1302
+
1303
+ # if 'subsections' in content_json:
1304
+ # for subsection_name, val in content_json['subsections'].items():
1305
+ # if 'path' in val:
1306
+ # res[val['path']] = get_img_ratio(val['path'])
1307
+
1308
+ # return res
1309
+
1310
+
1311
+ # def get_snapshot_from_section(leaf_section, section_name, name_to_hierarchy, leaf_name, section_code, empty_poster_path='poster.pptx'):
1312
+ # hierarchy = name_to_hierarchy[leaf_name]
1313
+ # hierarchy_overflow_name = f'tmp/overflow_check_<{section_name}>_<{leaf_section}>_hierarchy_{hierarchy}'
1314
+ # run_code_with_utils(section_code, utils_functions)
1315
+ # poster = Presentation(empty_poster_path)
1316
+ # # add border regardless of the hierarchy
1317
+ # curr_location = add_border_hierarchy(
1318
+ # poster,
1319
+ # name_to_hierarchy,
1320
+ # hierarchy,
1321
+ # border_width=10,
1322
+ # # regardless=True
1323
+ # )
1324
+ # if not leaf_section in curr_location:
1325
+ # leaf_section = section_name
1326
+ # save_presentation(poster, file_name=f"{hierarchy_overflow_name}.pptx")
1327
+ # ppt_to_images(
1328
+ # f"{hierarchy_overflow_name}.pptx",
1329
+ # hierarchy_overflow_name,
1330
+ # dpi=200
1331
+ # )
1332
+ # poster_image_path = os.path.join(f"{hierarchy_overflow_name}", "slide_0001.jpg")
1333
+ # poster_image = Image.open(poster_image_path)
1334
+
1335
+ # poster_width = emu_to_inches(poster.slide_width)
1336
+ # poster_height = emu_to_inches(poster.slide_height)
1337
+ # locations = convert_pptx_bboxes_json_to_image_json(
1338
+ # curr_location,
1339
+ # poster_width,
1340
+ # poster_height
1341
+ # )
1342
+ # zoomed_in_img = zoom_in_image_by_bbox(
1343
+ # poster_image,
1344
+ # locations[leaf_name],
1345
+ # padding=0.01
1346
+ # )
1347
+ # # save the zoomed_in_img
1348
+ # zoomed_in_img.save(f"{hierarchy_overflow_name}_zoomed_in.jpg")
1349
+ # return curr_location, zoomed_in_img, f"{hierarchy_overflow_name}_zoomed_in.jpg"
app.py CHANGED
@@ -151,8 +151,8 @@ iface = gr.Interface(
151
  ],
152
  title="πŸ“„ Paper2Poster",
153
  description=(
154
- "Upload your paper, and the pipeline will automatically generate a fully compilable LaTeX poster; you can download the ZIP file and compile it yourself. Each paper takes approximately 6–10 minutes to process."
155
- "Provide either an arXiv link or upload a PDF file (choose one); the system will generate a poster and package it for download."
156
  "You must upload at least one institutional logo (multiple allowed).\n"
157
  ),
158
  allow_flagging="never",
 
151
  ],
152
  title="πŸ“„ Paper2Poster",
153
  description=(
154
+ "Upload your paper, and the pipeline will automatically generate a fully compilable LaTeX poster; you can download the ZIP file and compile it yourself. Each paper takes approximately 6–10 minutes to process.\n"
155
+ "Provide either an arXiv link or upload a PDF file (choose one); the system will generate a poster and package it for download.\n"
156
  "You must upload at least one institutional logo (multiple allowed).\n"
157
  ),
158
  allow_flagging="never",