youngtsai commited on
Commit
ee689dc
1 Parent(s): b872b89

summary_json = {"summary": str(summary)}

Browse files
Files changed (1) hide show
  1. app.py +43 -31
app.py CHANGED
@@ -527,7 +527,7 @@ def process_youtube_link(link):
527
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
528
  first_image = formatted_transcript[0]['screenshot_path']
529
  first_text = formatted_transcript[0]['text']
530
- mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
531
  mind_map = mind_map_json["mind_map"]
532
  mind_map_html = get_mind_map_html(mind_map)
533
 
@@ -596,27 +596,47 @@ def process_web_link(link):
596
  soup = BeautifulSoup(response.content, 'html.parser')
597
  return soup.get_text()
598
 
599
- def get_mind_map(video_id, df_string):
600
- # 先抓 g drive 看看有沒有 {video_id}_mind_map.json
601
- print("===get_mind_map===")
602
- service = init_drive_service()
603
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
604
- folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
605
- file_name = f'{video_id}_mind_map.json'
 
 
 
 
 
 
 
 
 
 
 
 
 
606
 
607
- # 检查檔案是否存在
608
- exists, file_id = check_file_exists(service, folder_id, file_name)
609
- if not exists:
610
- mind_map = generate_mind_map(df_string)
611
- mind_map_json = {"mind_map": str(mind_map)}
612
- mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
613
- upload_content_directly(service, file_name, folder_id, mind_map_text)
614
- print("mind_map已上傳到Google Drive")
615
- else:
616
- # mindmap已存在,下载内容
617
- print("mind_map已存在于Google Drive中")
618
- mind_map_text = download_file_as_string(service, file_id)
619
- mind_map_json = json.loads(mind_map_text)
 
 
 
 
 
 
 
620
 
621
  return mind_map_json
622
 
@@ -712,7 +732,8 @@ def get_video_id_summary(video_id, df_string, source):
712
  # 检查 summary_file 是否存在
713
  is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
714
  if not is_summary_file_exists:
715
- summary_json = processed_video_summary_to_json(df_string)
 
716
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
717
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
718
  print("summary已上传到GCS")
@@ -733,7 +754,6 @@ def get_video_id_summary(video_id, df_string, source):
733
  exists, file_id = check_file_exists(service, folder_id, file_name)
734
  if not exists:
735
  summary = generate_summarise(df_string)
736
- # processed_summary = processed_video_summary_to_json(summary)
737
  summary_json = {"summary": str(summary)}
738
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
739
 
@@ -746,14 +766,6 @@ def get_video_id_summary(video_id, df_string, source):
746
  print("===get_video_id_summary error===")
747
  print(error_msg)
748
  print("===get_video_id_summary error===")
749
-
750
-
751
-
752
- # 存在 local at OUTPUT_PATH as {video_id}_summary.json
753
- # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
754
- # f.write(summary_text)
755
- # print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
756
- # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
757
  else:
758
  # 逐字稿已存在,下载逐字稿内容
759
  print("summary已存在Google Drive中")
 
527
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
528
  first_image = formatted_transcript[0]['screenshot_path']
529
  first_text = formatted_transcript[0]['text']
530
+ mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
531
  mind_map = mind_map_json["mind_map"]
532
  mind_map_html = get_mind_map_html(mind_map)
533
 
 
596
  soup = BeautifulSoup(response.content, 'html.parser')
597
  return soup.get_text()
598
 
599
+ def get_mind_map(video_id, df_string, source):
600
+ if source == "gcs":
601
+ print("===get_mind_map on gcs===")
602
+ gcs_client = init_gcs_client(GCS_KEY)
603
+ bucket_name = 'video_ai_assistant'
604
+ file_name = f'{video_id}_mind_map.json'
605
+ blob_name = f"{video_id}/{file_name}"
606
+ # 检查檔案是否存在
607
+ is_file_exists = gcs_check_file_exists(gcs_client, bucket_name, blob_name)
608
+ if not is_file_exists:
609
+ mind_map = generate_mind_map(df_string)
610
+ mind_map_json = {"mind_map": str(mind_map)}
611
+ mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
612
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
613
+ print("mind_map已上傳到GCS")
614
+ else:
615
+ # mindmap已存在,下载内容
616
+ print("mind_map已存在于GCS中")
617
+ mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
618
+ mind_map_json = json.loads(mind_map_text)
619
 
620
+ elif source == "drive":
621
+ print("===get_mind_map on drive===")
622
+ service = init_drive_service()
623
+ parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
624
+ folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
625
+ file_name = f'{video_id}_mind_map.json'
626
+
627
+ # 检查檔案是否存在
628
+ exists, file_id = check_file_exists(service, folder_id, file_name)
629
+ if not exists:
630
+ mind_map = generate_mind_map(df_string)
631
+ mind_map_json = {"mind_map": str(mind_map)}
632
+ mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
633
+ upload_content_directly(service, file_name, folder_id, mind_map_text)
634
+ print("mind_map已上傳到Google Drive")
635
+ else:
636
+ # mindmap已存在,下载内容
637
+ print("mind_map已存在于Google Drive中")
638
+ mind_map_text = download_file_as_string(service, file_id)
639
+ mind_map_json = json.loads(mind_map_text)
640
 
641
  return mind_map_json
642
 
 
732
  # 检查 summary_file 是否存在
733
  is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
734
  if not is_summary_file_exists:
735
+ summary = generate_summarise(df_string)
736
+ summary_json = {"summary": str(summary)}
737
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
738
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
739
  print("summary已上传到GCS")
 
754
  exists, file_id = check_file_exists(service, folder_id, file_name)
755
  if not exists:
756
  summary = generate_summarise(df_string)
 
757
  summary_json = {"summary": str(summary)}
758
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
759
 
 
766
  print("===get_video_id_summary error===")
767
  print(error_msg)
768
  print("===get_video_id_summary error===")
 
 
 
 
 
 
 
 
769
  else:
770
  # 逐字稿已存在,下载逐字稿内容
771
  print("summary已存在Google Drive中")