youngtsai commited on
Commit
7935a4b
1 Parent(s): c683e79
Files changed (2) hide show
  1. app.py +383 -179
  2. chatbot.py +27 -44
app.py CHANGED
@@ -72,7 +72,6 @@ else:
72
 
73
  TRANSCRIPTS = []
74
  CURRENT_INDEX = 0
75
- VIDEO_ID = ""
76
 
77
  OPEN_AI_CLIENT = OpenAI(api_key=OPEN_AI_KEY)
78
  GROQ_CLIENT = Groq(api_key=GROQ_API_KEY)
@@ -373,6 +372,9 @@ def get_transcript(video_id):
373
  for language in languages:
374
  try:
375
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
 
 
 
376
  return transcript # 成功獲取字幕,直接返回結果
377
  except NoTranscriptFound:
378
  continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
@@ -413,73 +415,33 @@ def generate_transcription(video_id):
413
  chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}"
414
  chunk.export(chunk_path, format=codec_name)
415
 
416
- with open(chunk_path, "rb") as chunk_file:
417
- response = OPEN_AI_CLIENT.audio.transcriptions.create(
418
- model="whisper-1",
419
- file=chunk_file,
420
- response_format="verbose_json",
421
- timestamp_granularities=["segment"],
422
- prompt="Transcribe the following audio file. if chinese, please using 'language: zh-TW' ",
423
- )
424
-
425
- # Adjusting the timestamps for the chunk based on its position in the full audio
426
- adjusted_segments = [{
427
- 'text': segment['text'],
428
- 'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds
429
- 'end': math.ceil(segment['end'] + start_time / 1000.0),
430
- 'duration': math.ceil(segment['end'] - segment['start'])
431
- } for segment in response.segments]
432
-
433
- transcription.extend(adjusted_segments)
 
 
 
434
 
435
  # Remove temporary chunk files after processing
436
  os.remove(chunk_path)
437
 
438
  return transcription
439
 
440
- def process_transcript_and_screenshots(video_id):
441
- print("====process_transcript_and_screenshots====")
442
-
443
- # Drive
444
- service = init_drive_service()
445
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
446
- folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
447
-
448
- # 逐字稿文件名
449
- file_name = f'{video_id}_transcript.json'
450
- # 检查逐字稿是否存在
451
- exists, file_id = check_file_exists(service, folder_id, file_name)
452
- if not exists:
453
- # 从YouTube获取逐字稿并上传
454
- transcript = get_transcript(video_id)
455
- if transcript:
456
- print("成功獲取字幕")
457
- else:
458
- print("沒有找到字幕")
459
- transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
460
- file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
461
- print("逐字稿已上传到Google Drive")
462
- else:
463
- # 逐字稿已存在,下载逐字稿内容
464
- print("逐字稿已存在于Google Drive中")
465
- transcript_text = download_file_as_string(service, file_id)
466
- transcript = json.loads(transcript_text)
467
-
468
- # 处理逐字稿中的每个条目,检查并上传截图
469
- for entry in transcript:
470
- if 'img_file_id' not in entry:
471
- screenshot_path = screenshot_youtube_video(video_id, entry['start'])
472
- img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
473
- set_public_permission(service, img_file_id)
474
- entry['img_file_id'] = img_file_id
475
- print(f"截图已上传到Google Drive: {img_file_id}")
476
-
477
- # 更新逐字稿文件
478
- updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
479
- update_file_on_drive(service, file_id, updated_transcript_text)
480
- print("逐字稿已更新,包括截图链接")
481
- return transcript
482
-
483
  def process_transcript_and_screenshots_on_gcs(video_id):
484
  print("====process_transcript_and_screenshots_on_gcs====")
485
  # GCS
@@ -492,6 +454,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
492
  is_new_transcript = False
493
  is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
494
  if not is_transcript_exists:
 
495
  # 从YouTube获取逐字稿并上传
496
  try:
497
  transcript = get_transcript(video_id)
@@ -567,11 +530,8 @@ def process_youtube_link(password, link):
567
  # 使用 YouTube API 获取逐字稿
568
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
569
  video_id = extract_youtube_id(link)
570
- global VIDEO_ID
571
- VIDEO_ID = video_id
572
 
573
  try:
574
- # transcript = process_transcript_and_screenshots(video_id)
575
  transcript = process_transcript_and_screenshots_on_gcs(video_id)
576
  except Exception as e:
577
  error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
@@ -579,17 +539,14 @@ def process_youtube_link(password, link):
579
  print(error_msg)
580
  raise gr.Error(error_msg)
581
 
 
582
  formatted_transcript = []
583
  formatted_simple_transcript =[]
584
- screenshot_paths = []
585
  for entry in transcript:
586
  start_time = format_seconds_to_time(entry['start'])
587
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
588
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
589
  img_file_id = entry['img_file_id']
590
- # img_file_id =""
591
- # 先取消 Google Drive 的图片
592
- # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
593
  screenshot_path = img_file_id
594
  line = {
595
  "start_time": start_time,
@@ -606,7 +563,6 @@ def process_youtube_link(password, link):
606
  "text": entry['text']
607
  }
608
  formatted_simple_transcript.append(simple_line)
609
- screenshot_paths.append(screenshot_path)
610
 
611
  global TRANSCRIPTS
612
  TRANSCRIPTS = formatted_transcript
@@ -614,11 +570,12 @@ def process_youtube_link(password, link):
614
  # 基于逐字稿生成其他所需的输出
615
  source = "gcs"
616
  questions = get_questions(video_id, formatted_simple_transcript, source)
617
- formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
618
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
619
  summary = summary_json["summary"]
620
  key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
621
  key_moments = key_moments_json["key_moments"]
 
622
  key_moments_html = get_key_moments_html(key_moments)
623
  html_content = format_transcript_to_html(formatted_transcript)
624
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
@@ -636,11 +593,13 @@ def process_youtube_link(password, link):
636
 
637
  # 确保返回与 UI 组件预期匹配的输出
638
  return video_id, \
 
639
  questions[0] if len(questions) > 0 else "", \
640
  questions[1] if len(questions) > 1 else "", \
641
  questions[2] if len(questions) > 2 else "", \
642
- formatted_transcript_json, \
643
  summary, \
 
644
  key_moments_html, \
645
  mind_map, \
646
  mind_map_html, \
@@ -652,6 +611,37 @@ def process_youtube_link(password, link):
652
  subject, \
653
  grade
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  def format_transcript_to_html(formatted_transcript):
656
  html_content = ""
657
  for entry in formatted_transcript:
@@ -1088,6 +1078,22 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
1088
  print("key_moments已存在于GCS中")
1089
  key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1090
  key_moments_json = json.loads(key_moments_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1091
 
1092
  elif source == "drive":
1093
  print("===get_key_moments on drive===")
@@ -1117,20 +1123,21 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
1117
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
1118
  user_content = f"""
1119
  請根據 {formatted_simple_transcript} 文本,提取出重點摘要,並給出對應的時間軸
1120
- 重點摘要的「關鍵時刻」加上截圖資訊
1121
  1. 小範圍切出不同段落的相對應時間軸的重點摘要,
1122
  2. 每一小段最多不超過 1/5 的總內容,也就是大約 3~5段的重點(例如五~十分鐘的影片就一段大約1~2分鐘,最多三分鐘,但如果是超過十分鐘的影片,那一小段大約 2~3分鐘,以此類推)
1123
  3. 注意不要遺漏任何一段時間軸的內容 從零秒開始
1124
  4. 如果頭尾的情節不是重點,就併入到附近的段落,特別是打招呼或是介紹人物就是不重要的情節
1125
  5. transcript 逐字稿的集合(要有合理的標點符號),要完整跟原來的一樣,不要省略
1126
  以這種方式分析整個文本,從零秒開始分析,直到結束。這很重要
 
 
1127
 
1128
  並用 JSON 格式返回 key_moments:[{{
1129
  "start": "00:00",
1130
- "end": "00:00",
1131
  "text": "逐字稿的重點摘要",
1132
  "transcript": "逐字稿的集合(要有合理的標點符號),要完整跟原來的一樣,不要省略",
1133
- "images": 截圖的連結們 list
1134
  }}]
1135
  """
1136
  messages = [
@@ -1148,6 +1155,8 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
1148
 
1149
  try:
1150
  response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
 
 
1151
  key_moments = json.loads(response.choices[0].message.content)["key_moments"]
1152
  except Exception as e:
1153
  error_msg = f" {video_id} 關鍵時刻錯誤: {str(e)}"
@@ -1168,6 +1177,28 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
1168
 
1169
  return key_moments
1170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1171
  def get_key_moments_html(key_moments):
1172
  css = """
1173
  <style>
@@ -1380,11 +1411,11 @@ def delete_LLM_content(video_id, kind):
1380
  bucket_name = 'video_ai_assistant'
1381
  file_name = f'{video_id}_{kind}.json'
1382
  blob_name = f"{video_id}/{file_name}"
1383
- # 检查 reading_passage 是否存在
1384
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1385
  if is_file_exists:
1386
  delete_blob(gcs_client, bucket_name, blob_name)
1387
- print("reading_passage已从GCS中删除")
1388
  return gr.update(value="", interactive=False)
1389
 
1390
  def update_LLM_content(video_id, new_content, kind):
@@ -1398,28 +1429,82 @@ def update_LLM_content(video_id, new_content, kind):
1398
  reading_passage_json = {"reading_passage": str(new_content)}
1399
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1400
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
 
1401
  elif kind == "summary":
1402
  summary_json = {"summary": str(new_content)}
1403
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1404
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
 
1405
  elif kind == "mind_map":
1406
  mind_map_json = {"mind_map": str(new_content)}
1407
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1408
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
 
1410
  print(f"{kind} 已更新到GCS")
1411
- return gr.update(value=new_content, interactive=False)
1412
 
1413
  def create_LLM_content(video_id, df_string, kind):
1414
  print(f"===create_{kind}===")
 
 
1415
  if kind == "reading_passage":
1416
  content = generate_reading_passage(df_string)
 
1417
  elif kind == "summary":
1418
  content = generate_summarise(df_string)
 
1419
  elif kind == "mind_map":
1420
  content = generate_mind_map(df_string)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1421
 
1422
- update_LLM_content(video_id, content, kind)
1423
  return gr.update(value=content, interactive=False)
1424
 
1425
 
@@ -1565,7 +1650,27 @@ def download_exam_result(content):
1565
  return word_path
1566
 
1567
  # ---- Chatbot ----
1568
- def chat_with_ai(ai_name, password, video_id, trascript, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1569
  verify_password(password)
1570
 
1571
  if chat_history is not None and len(chat_history) > 10:
@@ -1578,18 +1683,42 @@ def chat_with_ai(ai_name, password, video_id, trascript, user_message, chat_hist
1578
  ai_client = BEDROCK_CLIENT
1579
  elif ai_name == "groq":
1580
  ai_client = GROQ_CLIENT
 
 
 
 
1581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1582
  chatbot_config = {
1583
  "video_id": video_id,
1584
- "trascript": trascript,
 
1585
  "content_subject": content_subject,
1586
  "content_grade": content_grade,
1587
  "jutor_chat_key": JUTOR_CHAT_KEY,
1588
  "ai_name": ai_name,
1589
- "ai_client": ai_client
 
1590
  }
1591
- chatbot = Chatbot(chatbot_config)
1592
- response_completion = chatbot.chat(user_message, chat_history, socratic_mode, ai_name)
 
 
 
 
 
1593
 
1594
  try:
1595
  # 更新聊天历史
@@ -1606,7 +1735,7 @@ def chat_with_ai(ai_name, password, video_id, trascript, user_message, chat_hist
1606
  print(f"Error: {e}")
1607
  return "请求失败,请稍后再试!", chat_history
1608
 
1609
- def chat_with_opan_ai_assistant(password, youtube_id, thread_id, trascript, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
1610
  verify_password(password)
1611
 
1612
  # 先計算 user_message 是否超過 500 個字
@@ -1620,43 +1749,34 @@ def chat_with_opan_ai_assistant(password, youtube_id, thread_id, trascript, user
1620
  raise gr.Error(error_msg)
1621
 
1622
  try:
1623
- assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq"
 
 
1624
  client = OPEN_AI_CLIENT
1625
  # 直接安排逐字稿資料 in instructions
1626
- trascript_json = json.loads(trascript)
 
 
 
1627
  # 移除 embed_url, screenshot_path
1628
  for entry in trascript_json:
1629
- entry.pop('embed_url', None)
1630
- entry.pop('screenshot_path', None)
1631
- trascript_text = json.dumps(trascript_json, ensure_ascii=False, indent=2)
1632
-
1633
- instructions = f"""
1634
- 科目:{content_subject}
1635
- 年級:{content_grade}
1636
- 逐字稿資料:{trascript_text}
1637
- -------------------------------------
1638
- 你是一個專業的{content_subject}老師, user 為{content_grade}的學生
1639
- socratic_mode = {socratic_mode}
1640
- if socratic_mode is True,
1641
- - 請用蘇格拉底式的提問方式,引導學生思考,並且給予學生一些提示
1642
- - 一次只問一個問題,字數在100字以內
1643
- - 不要直接給予答案,讓學生自己思考
1644
- - 但可以給予一些提示跟引導��例如給予影片的時間軸,讓學生自己去找答案
1645
-
1646
- if socratic_mode is False,
1647
- - 直接回答學生問題,字數在100字以內
1648
-
1649
- rule:
1650
- - 請一定要用繁體中文回答 zh-TW,並用台灣人的口語表達,回答時不用特別說明這是台灣人的語氣,也不用說這是「台語的說法」
1651
- - 不用提到「逐字稿」這個詞,用「內容」代替
1652
- - 如果學生問了一些問題你無法判斷,請告訴學生你無法判斷,並建議學生可以問其他問題
1653
- - 或者你可以反問學生一些問題,幫助學生更好的理解資料,字數在100字以內
1654
- - 如果學生的問題與資料文本無關,請告訴學生你「無法回答超出影片範圍的問題」,並告訴他可以怎麼問什麼樣的問題(一個就好)
1655
- - 只要是參考逐字稿資料,請在回答的最後標註【參考資料:(分):(秒)】
1656
- - 回答範圍一定要在逐字稿資料內,不要引用其他資料,請嚴格執行
1657
- - 並在重複問句後給予學生鼓勵,讓學生有學習的動力
1658
- - 請用 {content_grade} 的學生能懂的方式回答
1659
- """
1660
 
1661
  # 创建线程
1662
  if not thread_id:
@@ -1665,11 +1785,18 @@ def chat_with_opan_ai_assistant(password, youtube_id, thread_id, trascript, user
1665
  else:
1666
  thread = client.beta.threads.retrieve(thread_id)
1667
 
 
 
 
 
 
 
 
1668
  # 向线程添加用户的消息
1669
  client.beta.threads.messages.create(
1670
  thread_id=thread.id,
1671
  role="user",
1672
- content=user_message + "/n (請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,不用提到「逐字稿」這個詞,用「內容」代替),回答時請用數學符號代替文字(Latex 用 $ 字號 render)"
1673
  )
1674
 
1675
  # 运行助手,生成响应
@@ -1773,7 +1900,7 @@ def poll_run_status(run_id, thread_id, timeout=600, poll_interval=5):
1773
 
1774
  return run.status
1775
 
1776
- def streaming_chat_with_open_ai(user_message, chat_history, password, thread_id, trascript, content_subject, content_grade):
1777
  verify_password(password)
1778
 
1779
  print("===streaming_chat_with_open_ai===")
@@ -1789,50 +1916,29 @@ def streaming_chat_with_open_ai(user_message, chat_history, password, thread_id,
1789
  error_msg = "此次對話超過上限"
1790
  raise gr.Error(error_msg)
1791
 
1792
- # fake data
1793
- socratic_mode = True
1794
-
1795
  try:
1796
- assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq"
 
1797
  client = OPEN_AI_CLIENT
1798
  # 直接安排逐字稿資料 in instructions
1799
- trascript_json = json.loads(trascript)
1800
- # 移除 embed_url, screenshot_path
1801
- for entry in trascript_json:
1802
- entry.pop('embed_url', None)
1803
- entry.pop('screenshot_path', None)
1804
- trascript_text = json.dumps(trascript_json, ensure_ascii=False, indent=2)
1805
  # trascript_text 移除 \n, 空白
1806
  trascript_text = trascript_text.replace("\n", "").replace(" ", "")
1807
 
1808
- instructions = f"""
1809
- 科目:{content_subject}
1810
- 年級:{content_grade}
1811
- 逐字稿資料:{trascript_text}
1812
- -------------------------------------
1813
- 你是一個專業的{content_subject}老師, user 為{content_grade}的學生
1814
- socratic_mode = {socratic_mode}
1815
- if socratic_mode is True,
1816
- - 請用蘇格拉底式的提問方式,引導學生思考,並且給予學生一些提示
1817
- - 一次只問一個問題,字數在100字以內
1818
- - 不要直接給予答案,讓學生自己思考
1819
- - 但可以給予一些提示跟引導,例如給予影片的時間軸,讓學生自己去找答案
1820
-
1821
- if socratic_mode is False,
1822
- - 直接回答學生問題,字數在100字以內
1823
-
1824
- rule:
1825
- - 請一定要用繁體中文回答 zh-TW,並用台灣人的口語表達,回答時不用特別說明這是台灣人的語氣,也不用說這是「台語的說法」
1826
- - 不用提到「逐字稿」這個詞,用「內容」代替
1827
- - 如果學生問了一些問題你無法判斷,請告訴學生你無法判斷,並建議學生可以問其他問題
1828
- - 或者你可以反問學生一些問題,幫助學生更好的理解資料,字數在100字以內
1829
- - 如果學生的問題與資料文本無關,請告訴學生你「無法回答超出影片範圍的問題」,並告訴他可以怎麼問什麼樣的問題(一個就好)
1830
- - 只要是參考逐字稿資料,請在回答的最後標註【參考資料:(分):(秒)】
1831
- - 回答範圍一定要在逐字稿資料內,不要引用其他資料,請嚴格執行
1832
- - 並在重複問句後給予學生鼓勵,讓學生有學習的動力
1833
- - 請用 {content_grade} 的學生能懂的方式回答
1834
- """
1835
 
 
1836
  # 创建线程
1837
  if not thread_id:
1838
  thread = client.beta.threads.create()
@@ -1933,18 +2039,22 @@ def init_params(text, request: gr.Request):
1933
 
1934
  return admin, reading_passage_admin, summary_admin, see_detail, password_text, youtube_link
1935
 
1936
- def update_state(content_subject, content_grade, trascript, question_1, question_2, question_3):
1937
  # inputs=[content_subject, content_grade, df_string_output],
1938
  # outputs=[content_subject_state, content_grade_state, trascript_state]
1939
  content_subject_state = content_subject
1940
  content_grade_state = content_grade
1941
- trascript_state = trascript
 
 
 
1942
  streaming_chat_thread_id_state = create_thread_id()
1943
  ai_chatbot_question_1 = question_1
1944
  ai_chatbot_question_2 = question_2
1945
  ai_chatbot_question_3 = question_3
1946
 
1947
- return content_subject_state, content_grade_state, trascript_state, streaming_chat_thread_id_state, \
 
1948
  ai_chatbot_question_1, ai_chatbot_question_2, ai_chatbot_question_3
1949
 
1950
 
@@ -2016,7 +2126,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2016
  with gr.Row() as admin:
2017
  password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
2018
  youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
2019
- video_id = gr.Textbox(label="video_id", visible=False)
2020
  # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
2021
  # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
2022
  user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
@@ -2025,6 +2135,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2025
  content_subject_state = gr.State() # 使用 gr.State 存储 content_subject
2026
  content_grade_state = gr.State() # 使用 gr.State 存储 content_grade
2027
  trascript_state = gr.State() # 使用 gr.State 存储 trascript
 
2028
  streaming_chat_thread_id_state = gr.State() # 使用 gr.State 存储 streaming_chat_thread_id
2029
  with gr.Tab("AI小精靈"):
2030
  with gr.Row():
@@ -2042,12 +2153,12 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2042
  btn_3 = gr.Button("問題一")
2043
  gr.Markdown("### 重新生成問題")
2044
  btn_create_question = gr.Button("生成其他問題", variant="primary")
2045
- openai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath")
2046
  with gr.Row():
2047
  msg = gr.Textbox(label="訊息",scale=3)
2048
  send_button = gr.Button("送出", variant="primary", scale=1)
2049
  with gr.Tab("飛特音速"):
2050
- additional_inputs = [password, streaming_chat_thread_id_state, trascript_state, content_subject_state, content_grade_state]
2051
  streaming_chat = gr.ChatInterface(
2052
  fn=streaming_chat_with_open_ai,
2053
  additional_inputs=additional_inputs,
@@ -2066,11 +2177,10 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2066
  ai_chatbot_question_1 = gr.Button("問題一")
2067
  ai_chatbot_question_2 = gr.Button("問題一")
2068
  ai_chatbot_question_3 = gr.Button("問題一")
2069
- ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath")
2070
  with gr.Row():
2071
- ai_msg = gr.Textbox(label="Message",scale=3)
2072
- ai_send_button = gr.Button("Send", variant="primary",scale=1)
2073
-
2074
  with gr.Tab("文章模式"):
2075
  with gr.Row() as reading_passage_admin:
2076
  reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
@@ -2094,7 +2204,6 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2094
  with gr.Tab("關鍵時刻"):
2095
  with gr.Row():
2096
  key_moments_html = gr.HTML(value="")
2097
-
2098
  with gr.Tab("教學備課"):
2099
  with gr.Row():
2100
  content_subject = gr.Dropdown(label="選擇主題", choices=["數學", "自然", "國文", "英文", "社會","物理", "化學", "生物", "地理", "歷史", "公民"], value="", visible=False)
@@ -2182,8 +2291,33 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2182
  # metacognition_content_btn = gr.Button("生成後設認知問題")
2183
 
2184
  with gr.Accordion("See Details", open=False) as see_details:
2185
- with gr.Tab("本文"):
2186
- df_string_output = gr.Textbox(lines=40, label="Data Text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2187
  with gr.Tab("逐字稿"):
2188
  simple_html_content = gr.HTML(label="Simple Transcript")
2189
  with gr.Tab("圖文"):
@@ -2206,7 +2340,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2206
  # OPENAI ASSISTANT CHATBOT 模式
2207
  send_button.click(
2208
  chat_with_opan_ai_assistant,
2209
- inputs=[password, video_id, thread_id, df_string_output, msg, chatbot, content_subject, content_grade, socratic_mode_btn],
2210
  outputs=[msg, chatbot, thread_id]
2211
  )
2212
  openai_chatbot_audio_input.change(
@@ -2215,9 +2349,9 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2215
  outputs=[msg]
2216
  )
2217
  # OPENAI ASSISTANT CHATBOT 連接按鈕點擊事件
2218
- btn_1_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, df_string_output, btn_1, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2219
- btn_2_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, df_string_output, btn_2, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2220
- btn_3_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, df_string_output, btn_3, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2221
  btn_1.click(
2222
  chat_with_opan_ai_assistant,
2223
  inputs=btn_1_chat_with_opan_ai_assistant_input,
@@ -2242,13 +2376,13 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2242
  # ai_chatbot 模式
2243
  ai_send_button.click(
2244
  chat_with_ai,
2245
- inputs=[ai_name, password, video_id, df_string_output, ai_msg, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn],
2246
  outputs=[ai_msg, ai_chatbot]
2247
  )
2248
  # ai_chatbot 连接按钮点击事件
2249
- ai_chatbot_question_1_chat_with_ai_input =[ai_name, password, video_id, df_string_output, ai_chatbot_question_1, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2250
- ai_chatbot_question_2_chat_with_ai_input =[ai_name, password, video_id, df_string_output, ai_chatbot_question_2, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2251
- ai_chatbot_question_3_chat_with_ai_input =[ai_name, password, video_id, df_string_output, ai_chatbot_question_3, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2252
  ai_chatbot_question_1.click(
2253
  chat_with_ai,
2254
  inputs=ai_chatbot_question_1_chat_with_ai_input,
@@ -2272,11 +2406,13 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2272
  process_youtube_link_inputs = [password, youtube_link]
2273
  process_youtube_link_outputs = [
2274
  video_id,
 
2275
  btn_1,
2276
  btn_2,
2277
  btn_3,
2278
  df_string_output,
2279
  df_summarise,
 
2280
  key_moments_html,
2281
  mind_map,
2282
  mind_map_html,
@@ -2292,6 +2428,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2292
  content_subject,
2293
  content_grade,
2294
  df_string_output,
 
2295
  btn_1,
2296
  btn_2,
2297
  btn_3
@@ -2299,7 +2436,8 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2299
  update_state_outputs = [
2300
  content_subject_state,
2301
  content_grade_state,
2302
- trascript_state,
 
2303
  streaming_chat_thread_id_state,
2304
  ai_chatbot_question_1,
2305
  ai_chatbot_question_2,
@@ -2374,6 +2512,72 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2374
  outputs=[df_summarise]
2375
  )
2376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2377
  # 教師版
2378
  worksheet_content_btn.click(
2379
  get_ai_content,
 
72
 
73
  TRANSCRIPTS = []
74
  CURRENT_INDEX = 0
 
75
 
76
  OPEN_AI_CLIENT = OpenAI(api_key=OPEN_AI_KEY)
77
  GROQ_CLIENT = Groq(api_key=GROQ_API_KEY)
 
372
  for language in languages:
373
  try:
374
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
375
+ print("===transcript===")
376
+ print(transcript)
377
+ print("===transcript===")
378
  return transcript # 成功獲取字幕,直接返回結果
379
  except NoTranscriptFound:
380
  continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
 
415
  chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}"
416
  chunk.export(chunk_path, format=codec_name)
417
 
418
+ try:
419
+ with open(chunk_path, "rb") as chunk_file:
420
+ response = OPEN_AI_CLIENT.audio.transcriptions.create(
421
+ model="whisper-1",
422
+ file=chunk_file,
423
+ response_format="verbose_json",
424
+ timestamp_granularities=["segment"],
425
+ prompt="Transcribe the following audio file. if content is chinese, please using 'language: zh-TW' ",
426
+ )
427
+
428
+ # Adjusting the timestamps for the chunk based on its position in the full audio
429
+ adjusted_segments = [{
430
+ 'text': segment['text'],
431
+ 'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds
432
+ 'end': math.ceil(segment['end'] + start_time / 1000.0),
433
+ 'duration': math.ceil(segment['end'] - segment['start'])
434
+ } for segment in response.segments]
435
+
436
+ transcription.extend(adjusted_segments)
437
+ except Exception as e:
438
+ print(f"Error processing chunk {i}: {str(e)}")
439
 
440
  # Remove temporary chunk files after processing
441
  os.remove(chunk_path)
442
 
443
  return transcription
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  def process_transcript_and_screenshots_on_gcs(video_id):
446
  print("====process_transcript_and_screenshots_on_gcs====")
447
  # GCS
 
454
  is_new_transcript = False
455
  is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
456
  if not is_transcript_exists:
457
+ print("逐字稿文件不存在于GCS中,重新建立")
458
  # 从YouTube获取逐字稿并上传
459
  try:
460
  transcript = get_transcript(video_id)
 
530
  # 使用 YouTube API 获取逐字稿
531
  # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
532
  video_id = extract_youtube_id(link)
 
 
533
 
534
  try:
 
535
  transcript = process_transcript_and_screenshots_on_gcs(video_id)
536
  except Exception as e:
537
  error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
 
539
  print(error_msg)
540
  raise gr.Error(error_msg)
541
 
542
+ original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
543
  formatted_transcript = []
544
  formatted_simple_transcript =[]
 
545
  for entry in transcript:
546
  start_time = format_seconds_to_time(entry['start'])
547
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
548
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
549
  img_file_id = entry['img_file_id']
 
 
 
550
  screenshot_path = img_file_id
551
  line = {
552
  "start_time": start_time,
 
563
  "text": entry['text']
564
  }
565
  formatted_simple_transcript.append(simple_line)
 
566
 
567
  global TRANSCRIPTS
568
  TRANSCRIPTS = formatted_transcript
 
570
  # 基于逐字稿生成其他所需的输出
571
  source = "gcs"
572
  questions = get_questions(video_id, formatted_simple_transcript, source)
573
+ questions_json = json.dumps(questions, ensure_ascii=False, indent=2)
574
  summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
575
  summary = summary_json["summary"]
576
  key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
577
  key_moments = key_moments_json["key_moments"]
578
+ key_moments_text = json.dumps(key_moments, ensure_ascii=False, indent=2)
579
  key_moments_html = get_key_moments_html(key_moments)
580
  html_content = format_transcript_to_html(formatted_transcript)
581
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
 
593
 
594
  # 确保返回与 UI 组件预期匹配的输出
595
  return video_id, \
596
+ questions_json, \
597
  questions[0] if len(questions) > 0 else "", \
598
  questions[1] if len(questions) > 1 else "", \
599
  questions[2] if len(questions) > 2 else "", \
600
+ original_transcript, \
601
  summary, \
602
+ key_moments_text, \
603
  key_moments_html, \
604
  mind_map, \
605
  mind_map_html, \
 
611
  subject, \
612
  grade
613
 
614
+ def create_formatted_simple_transcript(transcript):
615
+ formatted_simple_transcript = []
616
+ for entry in transcript:
617
+ start_time = format_seconds_to_time(entry['start'])
618
+ end_time = format_seconds_to_time(entry['start'] + entry['duration'])
619
+ line = {
620
+ "start_time": start_time,
621
+ "end_time": end_time,
622
+ "text": entry['text']
623
+ }
624
+ formatted_simple_transcript.append(line)
625
+ return formatted_simple_transcript
626
+
627
+ def create_formatted_transcript(video_id, transcript):
628
+ formatted_transcript = []
629
+ for entry in transcript:
630
+ start_time = format_seconds_to_time(entry['start'])
631
+ end_time = format_seconds_to_time(entry['start'] + entry['duration'])
632
+ embed_url = get_embedded_youtube_link(video_id, entry['start'])
633
+ img_file_id = entry['img_file_id']
634
+ screenshot_path = img_file_id
635
+ line = {
636
+ "start_time": start_time,
637
+ "end_time": end_time,
638
+ "text": entry['text'],
639
+ "embed_url": embed_url,
640
+ "screenshot_path": screenshot_path
641
+ }
642
+ formatted_transcript.append(line)
643
+ return formatted_transcript
644
+
645
  def format_transcript_to_html(formatted_transcript):
646
  html_content = ""
647
  for entry in formatted_transcript:
 
1078
  print("key_moments已存在于GCS中")
1079
  key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1080
  key_moments_json = json.loads(key_moments_text)
1081
+ # 檢查 key_moments 是否有 keywords
1082
+ print("===檢查 key_moments 是否有 keywords===")
1083
+ has_keywords_added = False
1084
+ for key_moment in key_moments_json["key_moments"]:
1085
+ if "keywords" not in key_moment:
1086
+ transcript = key_moment["transcript"]
1087
+ key_moment["keywords"] = generate_key_moments_keywords(transcript)
1088
+ print("===keywords===")
1089
+ print(key_moment["keywords"])
1090
+ print("===keywords===")
1091
+ has_keywords_added = True
1092
+ if has_keywords_added:
1093
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1094
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
1095
+ key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1096
+ key_moments_json = json.loads(key_moments_text)
1097
 
1098
  elif source == "drive":
1099
  print("===get_key_moments on drive===")
 
1123
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
1124
  user_content = f"""
1125
  請根據 {formatted_simple_transcript} 文本,提取出重點摘要,並給出對應的時間軸
 
1126
  1. 小範圍切出不同段落的相對應時間軸的重點摘要,
1127
  2. 每一小段最多不超過 1/5 的總內容,也就是大約 3~5段的重點(例如五~十分鐘的影片就一段大約1~2分鐘,最多三分鐘,但如果是超過十分鐘的影片,那一小段大約 2~3分鐘,以此類推)
1128
  3. 注意不要遺漏任何一段時間軸的內容 從零秒開始
1129
  4. 如果頭尾的情節不是重點,就併入到附近的段落,特別是打招呼或是介紹人物就是不重要的情節
1130
  5. transcript 逐字稿的集合(要有合理的標點符號),要完整跟原來的一樣,不要省略
1131
  以這種方式分析整個文本,從零秒開始分析,直到結束。這很重要
1132
+ 6. 關鍵字從transcript extract to keyword,保留專家名字、專業術語、年份、數字、期刊名稱、地名、數學公式
1133
+ 7. text, transcript, keywords please use or transfer zh-TW, it's very important
1134
 
1135
  並用 JSON 格式返回 key_moments:[{{
1136
  "start": "00:00",
1137
+ "end": "01:00",
1138
  "text": "逐字稿的重點摘要",
1139
  "transcript": "逐字稿的集合(要有合理的標點符號),要完整跟原來的一樣,不要省略",
1140
+ "keywords": ["關鍵字", "關鍵字"]
1141
  }}]
1142
  """
1143
  messages = [
 
1155
 
1156
  try:
1157
  response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
1158
+ print("===response===")
1159
+ print(dict(response))
1160
  key_moments = json.loads(response.choices[0].message.content)["key_moments"]
1161
  except Exception as e:
1162
  error_msg = f" {video_id} 關鍵時刻錯誤: {str(e)}"
 
1177
 
1178
  return key_moments
1179
 
1180
+ def generate_key_moments_keywords(transcript):
1181
+ system_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請根據以下文本提取關鍵字"
1182
+ user_content = f"""transcript extract to keyword
1183
+ 保留專家名字、專業術語、年份、數字、期刊名稱、地名、數學公式、數學表示式、物理化學符號,
1184
+ 不用給上下文,直接給出關鍵字,使用 zh-TW,用逗號分隔, example: 關鍵字1, 關鍵字2
1185
+ transcript:{transcript}
1186
+ """
1187
+ messages = [
1188
+ {"role": "system", "content": system_content},
1189
+ {"role": "user", "content": user_content}
1190
+ ]
1191
+ request_payload = {
1192
+ "model": "gpt-4-1106-preview",
1193
+ "messages": messages,
1194
+ "max_tokens": 100,
1195
+ }
1196
+
1197
+ response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
1198
+ keywords = response.choices[0].message.content.strip().split(", ")
1199
+
1200
+ return keywords
1201
+
1202
  def get_key_moments_html(key_moments):
1203
  css = """
1204
  <style>
 
1411
  bucket_name = 'video_ai_assistant'
1412
  file_name = f'{video_id}_{kind}.json'
1413
  blob_name = f"{video_id}/{file_name}"
1414
+ # 检查 file 是否存在
1415
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1416
  if is_file_exists:
1417
  delete_blob(gcs_client, bucket_name, blob_name)
1418
+ print(f"{file_name}已从GCS中删除")
1419
  return gr.update(value="", interactive=False)
1420
 
1421
  def update_LLM_content(video_id, new_content, kind):
 
1429
  reading_passage_json = {"reading_passage": str(new_content)}
1430
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1431
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
1432
+ updated_content = reading_passage_text
1433
  elif kind == "summary":
1434
  summary_json = {"summary": str(new_content)}
1435
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1436
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
1437
+ updated_content = summary_text
1438
  elif kind == "mind_map":
1439
  mind_map_json = {"mind_map": str(new_content)}
1440
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1441
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
1442
+ updated_content = mind_map_text
1443
+ elif kind == "key_moments":
1444
+ # from update_LLM_btn -> new_content is a string
1445
+ # create_LLM_content -> new_content is a list
1446
+ if isinstance(new_content, str):
1447
+ key_moments_list = json.loads(new_content)
1448
+ else:
1449
+ key_moments_list = new_content
1450
+ key_moments_json = {"key_moments": key_moments_list}
1451
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1452
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
1453
+ updated_content = key_moments_text
1454
+ elif kind == "transcript":
1455
+ if isinstance(new_content, str):
1456
+ transcript_json = json.loads(new_content)
1457
+ else:
1458
+ transcript_json = new_content
1459
+ transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
1460
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
1461
+ updated_content = transcript_text
1462
+ elif kind == "questions":
1463
+ # from update_LLM_btn -> new_content is a string
1464
+ # create_LLM_content -> new_content is a list
1465
+ if isinstance(new_content, str):
1466
+ questions_json = json.loads(new_content)
1467
+ else:
1468
+ questions_json = new_content
1469
+ questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
1470
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
1471
+ updated_content = questions_text
1472
 
1473
  print(f"{kind} 已更新到GCS")
1474
+ return gr.update(value=updated_content, interactive=False)
1475
 
1476
  def create_LLM_content(video_id, df_string, kind):
1477
  print(f"===create_{kind}===")
1478
+ print(f"video_id: {video_id}")
1479
+
1480
  if kind == "reading_passage":
1481
  content = generate_reading_passage(df_string)
1482
+ update_LLM_content(video_id, content, kind)
1483
  elif kind == "summary":
1484
  content = generate_summarise(df_string)
1485
+ update_LLM_content(video_id, content, kind)
1486
  elif kind == "mind_map":
1487
  content = generate_mind_map(df_string)
1488
+ update_LLM_content(video_id, content, kind)
1489
+ elif kind == "key_moments":
1490
+ if isinstance(df_string, str):
1491
+ transcript = json.loads(df_string)
1492
+ else:
1493
+ transcript = df_string
1494
+ formatted_simple_transcript = create_formatted_simple_transcript(transcript)
1495
+ formatted_transcript = create_formatted_transcript(video_id, transcript)
1496
+ content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
1497
+ update_LLM_content(video_id, content, kind)
1498
+ content = json.dumps(content, ensure_ascii=False, indent=2)
1499
+ elif kind == "transcript":
1500
+ content = process_transcript_and_screenshots_on_gcs(video_id)
1501
+ update_LLM_content(video_id, content, kind)
1502
+ content = json.dumps(content, ensure_ascii=False, indent=2)
1503
+ elif kind == "questions":
1504
+ content = generate_questions(df_string)
1505
+ update_LLM_content(video_id, content, kind)
1506
+ content = json.dumps(content, ensure_ascii=False, indent=2)
1507
 
 
1508
  return gr.update(value=content, interactive=False)
1509
 
1510
 
 
1650
  return word_path
1651
 
1652
  # ---- Chatbot ----
1653
+ def get_instructions(content_subject, content_grade, key_moments):
1654
+ instructions = f"""
1655
+ subject: {content_subject}
1656
+ grade: {content_grade}
1657
+ context: {key_moments}
1658
+ Assistant Role: you are a {content_subject} teacher
1659
+ User Role: {content_grade} th-grade student.
1660
+ Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
1661
+ Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
1662
+ Response:
1663
+ - Single question, under 100 characters
1664
+ - include math symbols (use LaTeX $ to cover before and after)
1665
+ - hint with video timestamp which format 【參考:00:00:00】.
1666
+ - Sometimes encourage user by Taiwanese style with relaxing atmosphere.
1667
+ - if user ask questions not include in context,
1668
+ - just tell them to ask the question in context and give them example question.
1669
+ Restrictions: Answer within video content, no external references
1670
+ """
1671
+ return instructions
1672
+
1673
+ def chat_with_ai(ai_name, password, video_id, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
1674
  verify_password(password)
1675
 
1676
  if chat_history is not None and len(chat_history) > 10:
 
1683
  ai_client = BEDROCK_CLIENT
1684
  elif ai_name == "groq":
1685
  ai_client = GROQ_CLIENT
1686
+ if isinstance(trascript_state, str):
1687
+ simple_transcript = json.loads(trascript_state)
1688
+ else:
1689
+ simple_transcript = trascript_state
1690
 
1691
+ if isinstance(key_moments, str):
1692
+ key_moments_json = json.loads(key_moments)
1693
+ else:
1694
+ key_moments_json = key_moments
1695
+ # key_moments_json remove images
1696
+ for moment in key_moments_json:
1697
+ moment.pop('images', None)
1698
+ moment.pop('end', None)
1699
+ moment.pop('text', None)
1700
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
1701
+
1702
+ instructions = get_instructions(content_subject, content_grade, key_moments_text)
1703
+
1704
  chatbot_config = {
1705
  "video_id": video_id,
1706
+ "transcript": simple_transcript,
1707
+ "key_moments": key_moments,
1708
  "content_subject": content_subject,
1709
  "content_grade": content_grade,
1710
  "jutor_chat_key": JUTOR_CHAT_KEY,
1711
  "ai_name": ai_name,
1712
+ "ai_client": ai_client,
1713
+ "instructions": instructions
1714
  }
1715
+
1716
+ try:
1717
+ chatbot = Chatbot(chatbot_config)
1718
+ response_completion = chatbot.chat(user_message, chat_history, socratic_mode, ai_name)
1719
+ except Exception as e:
1720
+ print(f"Error: {e}")
1721
+ response_completion = "學習精靈有點累,請稍後再試!"
1722
 
1723
  try:
1724
  # 更新聊天历史
 
1735
  print(f"Error: {e}")
1736
  return "请求失败,请稍后再试!", chat_history
1737
 
1738
+ def chat_with_opan_ai_assistant(password, youtube_id, thread_id, trascript_state, key_moments, user_message, chat_history, content_subject, content_grade, socratic_mode=False):
1739
  verify_password(password)
1740
 
1741
  # 先計算 user_message 是否超過 500 個字
 
1749
  raise gr.Error(error_msg)
1750
 
1751
  try:
1752
+ assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo
1753
+ # assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo
1754
+
1755
  client = OPEN_AI_CLIENT
1756
  # 直接安排逐字稿資料 in instructions
1757
+ if isinstance(trascript_state, str):
1758
+ trascript_json = json.loads(trascript_state)
1759
+ else:
1760
+ trascript_json = trascript_state
1761
  # 移除 embed_url, screenshot_path
1762
  for entry in trascript_json:
1763
+ entry.pop('end_time', None)
1764
+ trascript_text = json.dumps(trascript_json, ensure_ascii=False)
1765
+
1766
+ if isinstance(key_moments, str):
1767
+ key_moments_json = json.loads(key_moments)
1768
+ else:
1769
+ key_moments_json = key_moments
1770
+ # key_moments_json remove images
1771
+ for moment in key_moments_json:
1772
+ moment.pop('images', None)
1773
+ moment.pop('end', None)
1774
+ moment.pop('text', None)
1775
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
1776
+
1777
+ instructions = get_instructions(content_subject, content_grade, key_moments_text)
1778
+ print("=== instructions ===")
1779
+ print(instructions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1780
 
1781
  # 创建线程
1782
  if not thread_id:
 
1785
  else:
1786
  thread = client.beta.threads.retrieve(thread_id)
1787
 
1788
+ user_msg_note = """\n (請一定要用繁體中文回答 zh-TW,
1789
+ 請嚴格遵循instructions,擔任一位蘇格拉底家教,
1790
+ 並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,
1791
+ 不用提到「逐字稿」這個詞,用「內容」代替),
1792
+ 回答時請用數學符號代替文字(Latex 用 $ 字號 render)
1793
+ """
1794
+ user_msg_note = user_msg_note.replace(" ","").replace("\n","")
1795
  # 向线程添加用户的消息
1796
  client.beta.threads.messages.create(
1797
  thread_id=thread.id,
1798
  role="user",
1799
+ content=user_message + user_msg_note
1800
  )
1801
 
1802
  # 运行助手,生成响应
 
1900
 
1901
  return run.status
1902
 
1903
+ def streaming_chat_with_open_ai(user_message, chat_history, password, thread_id, trascript, key_moments, content_subject, content_grade):
1904
  verify_password(password)
1905
 
1906
  print("===streaming_chat_with_open_ai===")
 
1916
  error_msg = "此次對話超過上限"
1917
  raise gr.Error(error_msg)
1918
 
 
 
 
1919
  try:
1920
+ assistant_id = "asst_kmvZLNkDUYaNkMNtZEAYxyPq" #GPT 4 turbo
1921
+ # assistant_id = "asst_5SaUElqvL3U0ybSi9PRM8x3P" #GPT 3.5 turbo
1922
  client = OPEN_AI_CLIENT
1923
  # 直接安排逐字稿資料 in instructions
1924
+ if isinstance(trascript, str):
1925
+ trascript_json = json.loads(trascript)
1926
+ else:
1927
+ trascript_json = trascript
1928
+ trascript_text = json.dumps(trascript_json, ensure_ascii=False)
 
1929
  # trascript_text 移除 \n, 空白
1930
  trascript_text = trascript_text.replace("\n", "").replace(" ", "")
1931
 
1932
+ if isinstance(key_moments, str):
1933
+ key_moments_json = json.loads(key_moments)
1934
+ else:
1935
+ key_moments_json = key_moments
1936
+ # key_moments_json remove images
1937
+ for moment in key_moments_json:
1938
+ moment.pop('images', None)
1939
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1940
 
1941
+ instructions = get_instructions(content_subject, content_grade, key_moments_text)
1942
  # 创建线程
1943
  if not thread_id:
1944
  thread = client.beta.threads.create()
 
2039
 
2040
  return admin, reading_passage_admin, summary_admin, see_detail, password_text, youtube_link
2041
 
2042
+ def update_state(content_subject, content_grade, trascript, key_moments, question_1, question_2, question_3):
2043
  # inputs=[content_subject, content_grade, df_string_output],
2044
  # outputs=[content_subject_state, content_grade_state, trascript_state]
2045
  content_subject_state = content_subject
2046
  content_grade_state = content_grade
2047
+ trascript_json = json.loads(trascript)
2048
+ formatted_simple_transcript = create_formatted_simple_transcript(trascript_json)
2049
+ trascript_state = formatted_simple_transcript
2050
+ key_moments_state = key_moments
2051
  streaming_chat_thread_id_state = create_thread_id()
2052
  ai_chatbot_question_1 = question_1
2053
  ai_chatbot_question_2 = question_2
2054
  ai_chatbot_question_3 = question_3
2055
 
2056
+ return content_subject_state, content_grade_state, trascript_state, key_moments_state, \
2057
+ streaming_chat_thread_id_state, \
2058
  ai_chatbot_question_1, ai_chatbot_question_2, ai_chatbot_question_3
2059
 
2060
 
 
2126
  with gr.Row() as admin:
2127
  password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
2128
  youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
2129
+ video_id = gr.Textbox(label="video_id", visible=True)
2130
  # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
2131
  # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
2132
  user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
 
2135
  content_subject_state = gr.State() # 使用 gr.State 存储 content_subject
2136
  content_grade_state = gr.State() # 使用 gr.State 存储 content_grade
2137
  trascript_state = gr.State() # 使用 gr.State 存储 trascript
2138
+ key_moments_state = gr.State() # 使用 gr.State 存储 key_moments
2139
  streaming_chat_thread_id_state = gr.State() # 使用 gr.State 存储 streaming_chat_thread_id
2140
  with gr.Tab("AI小精靈"):
2141
  with gr.Row():
 
2153
  btn_3 = gr.Button("問題一")
2154
  gr.Markdown("### 重新生成問題")
2155
  btn_create_question = gr.Button("生成其他問題", variant="primary")
2156
+ openai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60)
2157
  with gr.Row():
2158
  msg = gr.Textbox(label="訊息",scale=3)
2159
  send_button = gr.Button("送出", variant="primary", scale=1)
2160
  with gr.Tab("飛特音速"):
2161
+ additional_inputs = [password, streaming_chat_thread_id_state, trascript_state, key_moments_state, content_subject_state, content_grade_state]
2162
  streaming_chat = gr.ChatInterface(
2163
  fn=streaming_chat_with_open_ai,
2164
  additional_inputs=additional_inputs,
 
2177
  ai_chatbot_question_1 = gr.Button("問題一")
2178
  ai_chatbot_question_2 = gr.Button("問題一")
2179
  ai_chatbot_question_3 = gr.Button("問題一")
2180
+ ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath", max_length=60)
2181
  with gr.Row():
2182
+ ai_msg = gr.Textbox(label="訊息輸入",scale=3)
2183
+ ai_send_button = gr.Button("送出", variant="primary",scale=1)
 
2184
  with gr.Tab("文章模式"):
2185
  with gr.Row() as reading_passage_admin:
2186
  reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
 
2204
  with gr.Tab("關鍵時刻"):
2205
  with gr.Row():
2206
  key_moments_html = gr.HTML(value="")
 
2207
  with gr.Tab("教學備課"):
2208
  with gr.Row():
2209
  content_subject = gr.Dropdown(label="選擇主題", choices=["數學", "自然", "國文", "英文", "社會","物理", "化學", "生物", "地理", "歷史", "公民"], value="", visible=False)
 
2291
  # metacognition_content_btn = gr.Button("生成後設認知問題")
2292
 
2293
  with gr.Accordion("See Details", open=False) as see_details:
2294
+ with gr.Tab("逐字稿本文"):
2295
+ with gr.Row() as transcript_admmin:
2296
+ transcript_kind = gr.Textbox(value="transcript", show_label=False)
2297
+ transcript_edit_button = gr.Button("編輯", size="sm", variant="primary")
2298
+ transcript_update_button = gr.Button("更新", size="sm", variant="primary")
2299
+ transcript_delete_button = gr.Button("刪除", size="sm", variant="primary")
2300
+ transcript_create_button = gr.Button("建立", size="sm", variant="primary")
2301
+ with gr.Row():
2302
+ df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
2303
+ with gr.Tab("關鍵時刻本文"):
2304
+ with gr.Row() as key_moments_admin:
2305
+ key_moments_kind = gr.Textbox(value="key_moments", show_label=False)
2306
+ key_moments_edit_button = gr.Button("編輯", size="sm", variant="primary")
2307
+ key_moments_update_button = gr.Button("更新", size="sm", variant="primary")
2308
+ key_moments_delete_button = gr.Button("刪除", size="sm", variant="primary")
2309
+ key_moments_create_button = gr.Button("建立", size="sm", variant="primary")
2310
+ with gr.Row():
2311
+ key_moments = gr.Textbox(label="Key Moments", lines=40, interactive=False, show_copy_button=True)
2312
+ with gr.Tab("問題本文"):
2313
+ with gr.Row() as question_list_admin:
2314
+ questions_kind = gr.Textbox(value="questions", show_label=False)
2315
+ questions_edit_button = gr.Button("編輯", size="sm", variant="primary")
2316
+ questions_update_button = gr.Button("更新", size="sm", variant="primary")
2317
+ questions_delete_button = gr.Button("刪除", size="sm", variant="primary")
2318
+ questions_create_button = gr.Button("建立", size="sm", variant="primary")
2319
+ with gr.Row():
2320
+ questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
2321
  with gr.Tab("逐字稿"):
2322
  simple_html_content = gr.HTML(label="Simple Transcript")
2323
  with gr.Tab("圖文"):
 
2340
  # OPENAI ASSISTANT CHATBOT 模式
2341
  send_button.click(
2342
  chat_with_opan_ai_assistant,
2343
+ inputs=[password, video_id, thread_id, trascript_state, key_moments, msg, chatbot, content_subject, content_grade, socratic_mode_btn],
2344
  outputs=[msg, chatbot, thread_id]
2345
  )
2346
  openai_chatbot_audio_input.change(
 
2349
  outputs=[msg]
2350
  )
2351
  # OPENAI ASSISTANT CHATBOT 連接按鈕點擊事件
2352
+ btn_1_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, trascript_state, key_moments, btn_1, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2353
+ btn_2_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, trascript_state, key_moments, btn_2, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2354
+ btn_3_chat_with_opan_ai_assistant_input =[password, video_id, thread_id, trascript_state, key_moments, btn_3, chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2355
  btn_1.click(
2356
  chat_with_opan_ai_assistant,
2357
  inputs=btn_1_chat_with_opan_ai_assistant_input,
 
2376
  # ai_chatbot 模式
2377
  ai_send_button.click(
2378
  chat_with_ai,
2379
+ inputs=[ai_name, password, video_id, trascript_state, key_moments, ai_msg, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn],
2380
  outputs=[ai_msg, ai_chatbot]
2381
  )
2382
  # ai_chatbot 连接按钮点击事件
2383
+ ai_chatbot_question_1_chat_with_ai_input =[ai_name, password, video_id, trascript_state, key_moments, ai_chatbot_question_1, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2384
+ ai_chatbot_question_2_chat_with_ai_input =[ai_name, password, video_id, trascript_state, key_moments, ai_chatbot_question_2, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2385
+ ai_chatbot_question_3_chat_with_ai_input =[ai_name, password, video_id, trascript_state, key_moments, ai_chatbot_question_3, ai_chatbot, content_subject, content_grade, ai_chatbot_socratic_mode_btn]
2386
  ai_chatbot_question_1.click(
2387
  chat_with_ai,
2388
  inputs=ai_chatbot_question_1_chat_with_ai_input,
 
2406
  process_youtube_link_inputs = [password, youtube_link]
2407
  process_youtube_link_outputs = [
2408
  video_id,
2409
+ questions_json,
2410
  btn_1,
2411
  btn_2,
2412
  btn_3,
2413
  df_string_output,
2414
  df_summarise,
2415
+ key_moments,
2416
  key_moments_html,
2417
  mind_map,
2418
  mind_map_html,
 
2428
  content_subject,
2429
  content_grade,
2430
  df_string_output,
2431
+ key_moments,
2432
  btn_1,
2433
  btn_2,
2434
  btn_3
 
2436
  update_state_outputs = [
2437
  content_subject_state,
2438
  content_grade_state,
2439
+ trascript_state,
2440
+ key_moments_state,
2441
  streaming_chat_thread_id_state,
2442
  ai_chatbot_question_1,
2443
  ai_chatbot_question_2,
 
2512
  outputs=[df_summarise]
2513
  )
2514
 
2515
+ # transcript event
2516
+ transcript_create_button.click(
2517
+ create_LLM_content,
2518
+ inputs=[video_id, df_string_output, transcript_kind],
2519
+ outputs=[df_string_output]
2520
+ )
2521
+ transcript_delete_button.click(
2522
+ delete_LLM_content,
2523
+ inputs=[video_id, transcript_kind],
2524
+ outputs=[df_string_output]
2525
+ )
2526
+ transcript_edit_button.click(
2527
+ enable_edit_mode,
2528
+ inputs=[],
2529
+ outputs=[df_string_output]
2530
+ )
2531
+ transcript_update_button.click(
2532
+ update_LLM_content,
2533
+ inputs=[video_id, df_string_output, transcript_kind],
2534
+ outputs=[df_string_output]
2535
+ )
2536
+
2537
+ # key_moments event
2538
+ key_moments_create_button.click(
2539
+ create_LLM_content,
2540
+ inputs=[video_id, df_string_output, key_moments_kind],
2541
+ outputs=[key_moments]
2542
+ )
2543
+ key_moments_delete_button.click(
2544
+ delete_LLM_content,
2545
+ inputs=[video_id, key_moments_kind],
2546
+ outputs=[key_moments]
2547
+ )
2548
+ key_moments_edit_button.click(
2549
+ enable_edit_mode,
2550
+ inputs=[],
2551
+ outputs=[key_moments]
2552
+ )
2553
+ key_moments_update_button.click(
2554
+ update_LLM_content,
2555
+ inputs=[video_id, key_moments, key_moments_kind],
2556
+ outputs=[key_moments]
2557
+ )
2558
+
2559
+ # question_list event
2560
+ questions_create_button.click(
2561
+ create_LLM_content,
2562
+ inputs=[video_id, df_string_output, questions_kind],
2563
+ outputs=[questions_json]
2564
+ )
2565
+ questions_delete_button.click(
2566
+ delete_LLM_content,
2567
+ inputs=[video_id, questions_kind],
2568
+ outputs=[questions_json]
2569
+ )
2570
+ questions_edit_button.click(
2571
+ enable_edit_mode,
2572
+ inputs=[],
2573
+ outputs=[questions_json]
2574
+ )
2575
+ questions_update_button.click(
2576
+ update_LLM_content,
2577
+ inputs=[video_id, questions_json, questions_kind],
2578
+ outputs=[questions_json]
2579
+ )
2580
+
2581
  # 教師版
2582
  worksheet_content_btn.click(
2583
  get_ai_content,
chatbot.py CHANGED
@@ -8,65 +8,46 @@ class Chatbot:
8
  self.content_subject = config.get('content_subject')
9
  self.content_grade = config.get('content_grade')
10
  self.jutor_chat_key = config.get('jutor_chat_key')
11
- self.transcript_text = self.get_transcript_text(config.get('trascript'))
 
12
  self.ai_name = config.get('ai_name')
13
  self.ai_client = config.get('ai_client')
 
14
 
15
  def get_transcript_text(self, transcript_data):
16
- transcript_json = json.loads(transcript_data)
 
 
 
17
  for entry in transcript_json:
18
- entry.pop('embed_url', None)
19
- entry.pop('screenshot_path', None)
20
  transcript_text = json.dumps(transcript_json, ensure_ascii=False)
21
  return transcript_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def chat(self, user_message, chat_history, socratic_mode=False, service_type='jutor'):
24
  messages = self.prepare_messages(chat_history, user_message)
25
- system_prompt = self.prepare_system_prompt(socratic_mode)
26
  if service_type in ['jutor', 'groq', 'claude3']:
27
  response_text = self.chat_with_service(service_type, system_prompt, messages)
28
  return response_text
29
  else:
30
  raise gr.Error("不支持此服務")
31
 
32
- def prepare_system_prompt(self, socratic_mode):
33
- content_subject = self.content_subject
34
- content_grade = self.content_grade
35
- video_id = self.video_id
36
- trascript_text = self.transcript_text
37
- socratic_mode = str(socratic_mode)
38
- ai_name = self.ai_name
39
- system_prompt = f"""
40
- 科目:{content_subject}
41
- 年級:{content_grade}
42
- 逐字稿資料:{trascript_text}
43
- -------------------------------------
44
- 你是一個專業的{content_subject}老師, user 為{content_grade}的學生
45
- socratic_mode = {socratic_mode}
46
- if socratic_mode is True,
47
- - 請用蘇格拉底式的提問方式,引導學生思考,並且給予學生一些提示
48
- - 一次只問一個問題,字數在100字以內
49
- - 不要直接給予答案,讓學生自己思考
50
- - 但可以給予一些提示跟引導,例如給予影片的時間軸,讓學生自己去找答案
51
-
52
- if socratic_mode is False,
53
- - 直接回答學生問題,字數在100字以內
54
-
55
- rule:
56
- - 請一定要用繁體中文回答 zh-TW,並用台灣人的口語表達,回答時不用特別說明這是台灣人的語氣,也不用說這是「台語的說法」
57
- - 不用提到「逐字稿」這個詞
58
- - 如果學生問了一些問題你無法判斷,請告訴學生你無法判斷,並建議學生可以問其他問題
59
- - 或者你可以反問學生一些問題,幫助學生更好的理解資料,字數在100字以內
60
- - 如果學生的問題與資料文本無關,請告訴學生你「無法回答超出影片範圍的問題」,並告訴他可以怎麼問什麼樣的問題(一個就好)
61
- - 只要是參考逐字稿資料,請在回答的最後標註【參考資料:(分):(秒)】
62
- - 回答範圍一定要在逐字稿資料內,不要引用其他資料,請嚴格執行
63
- - 並在重複問句後給予學生鼓勵,讓學生有學習的動力
64
- - 請用 {content_grade} 的學生能懂的方式回答
65
- - 回答時數學式請用數學符號代替文字(Latex 用 $ 字號 render)
66
- """
67
-
68
- return system_prompt
69
-
70
  def prepare_messages(self, chat_history, user_message):
71
  messages = []
72
  if chat_history is not None:
@@ -101,12 +82,14 @@ class Chatbot:
101
  "Content-Type": "application/json",
102
  "x-api-key": self.jutor_chat_key,
103
  }
 
 
104
  data = {
105
  "data": {
106
  "messages": messages,
107
  "max_tokens": 512,
108
  "temperature": 0.9,
109
- "model": "gpt-4-1106-preview",
110
  "stream": False,
111
  }
112
  }
 
8
  self.content_subject = config.get('content_subject')
9
  self.content_grade = config.get('content_grade')
10
  self.jutor_chat_key = config.get('jutor_chat_key')
11
+ self.transcript_text = self.get_transcript_text(config.get('transcript'))
12
+ self.key_moments_text = self.get_key_moments_text(config.get('key_moments'))
13
  self.ai_name = config.get('ai_name')
14
  self.ai_client = config.get('ai_client')
15
+ self.instructions = config.get('instructions')
16
 
17
  def get_transcript_text(self, transcript_data):
18
+ if isinstance(transcript_data, str):
19
+ transcript_json = json.loads(transcript_data)
20
+ else:
21
+ transcript_json = transcript_data
22
  for entry in transcript_json:
23
+ entry.pop('end_time', None)
 
24
  transcript_text = json.dumps(transcript_json, ensure_ascii=False)
25
  return transcript_text
26
+
27
+ def get_key_moments_text(self, key_moments_data):
28
+ if isinstance(key_moments_data, str):
29
+ key_moments_json = json.loads(key_moments_data)
30
+ else:
31
+ key_moments_json = key_moments_data
32
+ # key_moments_json remove images
33
+ for moment in key_moments_json:
34
+ moment.pop('images', None)
35
+ moment.pop('end', None)
36
+ moment.pop('transcript', None)
37
+
38
+ key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
39
+ return key_moments_text
40
+
41
 
42
  def chat(self, user_message, chat_history, socratic_mode=False, service_type='jutor'):
43
  messages = self.prepare_messages(chat_history, user_message)
44
+ system_prompt = self.instructions
45
  if service_type in ['jutor', 'groq', 'claude3']:
46
  response_text = self.chat_with_service(service_type, system_prompt, messages)
47
  return response_text
48
  else:
49
  raise gr.Error("不支持此服務")
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def prepare_messages(self, chat_history, user_message):
52
  messages = []
53
  if chat_history is not None:
 
82
  "Content-Type": "application/json",
83
  "x-api-key": self.jutor_chat_key,
84
  }
85
+ model = "gpt-4-1106-preview"
86
+ # model = "gpt-3.5-turbo-0125"
87
  data = {
88
  "data": {
89
  "messages": messages,
90
  "max_tokens": 512,
91
  "temperature": 0.9,
92
+ "model": model,
93
  "stream": False,
94
  }
95
  }