youngtsai commited on
Commit
dc5db85
1 Parent(s): e5a1f9f

def preprocess_text(text):

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -393,6 +393,18 @@ def process_web_link(link):
393
  return soup.get_text()
394
 
395
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
  # get video_id_summary.json content
398
  def get_video_id_summary(video_id, df_string):
@@ -406,7 +418,9 @@ def get_video_id_summary(video_id, df_string):
406
  exists, file_id = check_file_exists(service, folder_id, file_name)
407
  if not exists:
408
  summary = generate_summarise(df_string)
409
- summary_json = {"summary": summary}
 
 
410
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
411
  file_id = upload_content_directly(service, file_name, folder_id, summary_text)
412
  print("summary已上传到Google Drive")
 
393
  return soup.get_text()
394
 
395
 
396
+ def preprocess_text(text):
397
+ # 预处理文本以便在 JSON 中使用
398
+ json_text = text.replace("\n", "\\n")
399
+ json_text = json_text.replace('"', '\\"')
400
+ json_text = json_text.replace("'", "\\'")
401
+ json_text = json_text.replace("\t", "\\t")
402
+ json_text = json_text.replace("\r", "\\r")
403
+ json_text = json_text.replace("\f", "\\f")
404
+ json_text = json_text.replace("\b", "\\b")
405
+ json_text = json_text.replace("\v", "\\v")
406
+ return json_text
407
+
408
 
409
  # get video_id_summary.json content
410
  def get_video_id_summary(video_id, df_string):
 
418
  exists, file_id = check_file_exists(service, folder_id, file_name)
419
  if not exists:
420
  summary = generate_summarise(df_string)
421
+ # preprocess_text to json value acceptable
422
+ processed_summary = preprocess_text(summary)
423
+ summary_json = {"summary": processed_summary}
424
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
425
  file_id = upload_content_directly(service, file_name, folder_id, summary_text)
426
  print("summary已上传到Google Drive")