youngtsai commited on
Commit
1643087
1 Parent(s): 2f6be19

get_questions

Browse files
Files changed (1) hide show
  1. app.py +43 -21
app.py CHANGED
@@ -518,7 +518,8 @@ def process_youtube_link(link):
518
  TRANSCRIPTS = formatted_transcript
519
 
520
  # 基于逐字稿生成其他所需的输出
521
- questions = get_questions(video_id, formatted_simple_transcript)
 
522
  formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
523
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
524
  summary = summary_json["summary"]
@@ -824,26 +825,47 @@ def generate_questions(df_string):
824
 
825
  return questions
826
 
827
- def get_questions(video_id, df_string):
828
- # g drive 確認是有有 video_id_questions.json
829
- print("===get_questions===")
830
- service = init_drive_service()
831
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
832
- folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
833
- file_name = f'{video_id}_questions.json'
834
-
835
- # 检查檔案是否存在
836
- exists, file_id = check_file_exists(service, folder_id, file_name)
837
- if not exists:
838
- questions = generate_questions(df_string)
839
- questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
840
- upload_content_directly(service, file_name, folder_id, questions_text)
841
- print("questions已上傳到Google Drive")
842
- else:
843
- # 逐字稿已存在,下载逐字稿内容
844
- print("questions已存在于Google Drive中")
845
- questions_text = download_file_as_string(service, file_id)
846
- questions = json.loads(questions_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
 
848
  q1 = questions[0] if len(questions) > 0 else ""
849
  q2 = questions[1] if len(questions) > 1 else ""
 
518
  TRANSCRIPTS = formatted_transcript
519
 
520
  # 基于逐字稿生成其他所需的输出
521
+ source = "gcs"
522
+ questions = get_questions(video_id, formatted_simple_transcript, source)
523
  formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
524
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
525
  summary = summary_json["summary"]
 
825
 
826
  return questions
827
 
828
+ def get_questions(video_id, df_string, source="gcs"):
829
+ if source == "gcs":
830
+ # 去 gcs 確認是有有 video_id_questions.json
831
+ print("===get_questions on gcs===")
832
+ gcs_client = init_gcs_client(GCS_KEY)
833
+ bucket_name = 'video_ai_assistant'
834
+ file_name = f'{video_id}_questions.json'
835
+ blob_name = f"{video_id}/{file_name}"
836
+ # 检查檔案是否存在
837
+ is_questions_exists = gcs_check_file_exists(gcs_client, bucket_name, blob_name)
838
+ if not is_questions_exists:
839
+ questions = generate_questions(df_string)
840
+ questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
841
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
842
+ print("questions已上傳到GCS")
843
+ else:
844
+ # 逐字稿已存在,下载逐字稿内容
845
+ print("questions已存在于GCS中")
846
+ questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
847
+ questions = json.loads(questions_text)
848
+
849
+ else if source == "drive":
850
+ # 去 g drive 確認是有有 video_id_questions.json
851
+ print("===get_questions===")
852
+ service = init_drive_service()
853
+ parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
854
+ folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
855
+ file_name = f'{video_id}_questions.json'
856
+
857
+ # 检查檔案是否存在
858
+ exists, file_id = check_file_exists(service, folder_id, file_name)
859
+ if not exists:
860
+ questions = generate_questions(df_string)
861
+ questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
862
+ upload_content_directly(service, file_name, folder_id, questions_text)
863
+ print("questions已上傳到Google Drive")
864
+ else:
865
+ # 逐字稿已存在,下载逐字稿内容
866
+ print("questions已存在于Google Drive中")
867
+ questions_text = download_file_as_string(service, file_id)
868
+ questions = json.loads(questions_text)
869
 
870
  q1 = questions[0] if len(questions) > 0 else ""
871
  q2 = questions[1] if len(questions) > 1 else ""