youngtsai commited on
Commit
b872b89
1 Parent(s): f28acdd

def get_video_id_summary(video_id, df_string, source):

Browse files
Files changed (1) hide show
  1. app.py +53 -35
app.py CHANGED
@@ -521,7 +521,7 @@ def process_youtube_link(link):
521
  source = "gcs"
522
  questions = get_questions(video_id, formatted_simple_transcript, source)
523
  formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
524
- summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
525
  summary = summary_json["summary"]
526
  html_content = format_transcript_to_html(formatted_transcript)
527
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
@@ -702,46 +702,64 @@ def processed_video_summary_to_json(summary):
702
  return summary_json
703
 
704
  # get video_id_summary.json content
705
- def get_video_id_summary(video_id, df_string):
706
- print("===get_video_id_summary===")
707
- service = init_drive_service()
708
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
709
- folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
710
- file_name = f'{video_id}_summary.json'
711
-
712
- # 检查逐字稿是否存在
713
- exists, file_id = check_file_exists(service, folder_id, file_name)
714
- if not exists:
715
- summary = generate_summarise(df_string)
716
- # processed_summary = processed_video_summary_to_json(summary)
717
- summary_json = {"summary": str(summary)}
718
- summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
719
-
720
- try:
721
- upload_content_directly(service, file_name, folder_id, summary_text)
722
- print("summary已上傳到Google Drive")
 
723
 
724
- except Exception as e:
725
- error_msg = f" {video_id} 摘要錯誤: {str(e)}"
726
- print("===get_video_id_summary error===")
727
- print(error_msg)
728
- print("===get_video_id_summary error===")
 
729
 
 
 
 
 
 
 
 
 
 
 
 
730
 
 
 
 
 
 
731
 
732
- # 存在 local at OUTPUT_PATH as {video_id}_summary.json
733
- # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
734
- # f.write(summary_text)
735
- # print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
736
- # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
737
 
738
 
739
- else:
740
- # 逐字稿已存在,下载逐字稿内容
741
- print("summary已存在Google Drive中")
742
- summary_text = download_file_as_string(service, file_id)
743
- summary_json = json.loads(summary_text)
744
-
 
 
 
 
 
745
  return summary_json
746
 
747
 
 
521
  source = "gcs"
522
  questions = get_questions(video_id, formatted_simple_transcript, source)
523
  formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
524
+ summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
525
  summary = summary_json["summary"]
526
  html_content = format_transcript_to_html(formatted_transcript)
527
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
 
702
  return summary_json
703
 
704
  # get video_id_summary.json content
705
+ def get_video_id_summary(video_id, df_string, source):
706
+ if source == "gcs":
707
+ print("===get_video_id_summary on gcs===")
708
+ gcs_client = init_gcs_client(GCS_KEY)
709
+ bucket_name = 'video_ai_assistant'
710
+ file_name = f'{video_id}_summary.json'
711
+ summary_file_blob_name = f"{video_id}/{file_name}"
712
+ # 检查 summary_file 是否存在
713
+ is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
714
+ if not is_summary_file_exists:
715
+ summary_json = processed_video_summary_to_json(df_string)
716
+ summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
717
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
718
+ print("summary已上传到GCS")
719
+ else:
720
+ # summary已存在,下载内容
721
+ print("summary已存在于GCS中")
722
+ summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
723
+ summary_json = json.loads(summary_text)
724
 
725
+ elif source == "drive":
726
+ print("===get_video_id_summary===")
727
+ service = init_drive_service()
728
+ parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
729
+ folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
730
+ file_name = f'{video_id}_summary.json'
731
 
732
+ # 检查逐字稿是否存在
733
+ exists, file_id = check_file_exists(service, folder_id, file_name)
734
+ if not exists:
735
+ summary = generate_summarise(df_string)
736
+ # processed_summary = processed_video_summary_to_json(summary)
737
+ summary_json = {"summary": str(summary)}
738
+ summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
739
+
740
+ try:
741
+ upload_content_directly(service, file_name, folder_id, summary_text)
742
+ print("summary已上傳到Google Drive")
743
 
744
+ except Exception as e:
745
+ error_msg = f" {video_id} 摘要錯誤: {str(e)}"
746
+ print("===get_video_id_summary error===")
747
+ print(error_msg)
748
+ print("===get_video_id_summary error===")
749
 
 
 
 
 
 
750
 
751
 
752
+ # 存在 local at OUTPUT_PATH as {video_id}_summary.json
753
+ # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
754
+ # f.write(summary_text)
755
+ # print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
756
+ # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
757
+ else:
758
+ # 逐字稿已存在,下载逐字稿内容
759
+ print("summary已存在Google Drive中")
760
+ summary_text = download_file_as_string(service, file_id)
761
+ summary_json = json.loads(summary_text)
762
+
763
  return summary_json
764
 
765