souljoy commited on
Commit
4dc14ee
1 Parent(s): c2579e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -9,13 +9,16 @@ import numpy as np
9
  import openai
10
  from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, Prompt
11
  from transformers import pipeline
 
12
 
 
13
  ocr = CnOcr() # 初始化ocr模型
14
  history_max_len = 500 # 机器人记忆的最大长度
15
  all_max_len = 2000 # 输入的最大长度
16
  asr_model_id = "openai/whisper-tiny" # 更新为你的模型ID
17
  asr_pipe = pipeline("automatic-speech-recognition", model=asr_model_id)
18
 
 
19
  def get_text_emb(open_ai_key, text): # 文本向量化
20
  openai.api_key = open_ai_key # 设置openai的key
21
  response = openai.Embedding.create(
@@ -196,7 +199,9 @@ def transcribe_speech(filepath):
196
  chunk_length_s=30,
197
  batch_size=8,
198
  )
199
- return output["text"]
 
 
200
 
201
 
202
  with gr.Blocks() as demo:
 
9
  import openai
10
  from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, Prompt
11
  from transformers import pipeline
12
+ import opencc
13
 
14
+ converter = opencc.OpenCC('t2s') # 创建一个OpenCC实例,指定繁体字转为简体字
15
  ocr = CnOcr() # 初始化ocr模型
16
  history_max_len = 500 # 机器人记忆的最大长度
17
  all_max_len = 2000 # 输入的最大长度
18
  asr_model_id = "openai/whisper-tiny" # 更新为你的模型ID
19
  asr_pipe = pipeline("automatic-speech-recognition", model=asr_model_id)
20
 
21
+
22
  def get_text_emb(open_ai_key, text): # 文本向量化
23
  openai.api_key = open_ai_key # 设置openai的key
24
  response = openai.Embedding.create(
 
199
  chunk_length_s=30,
200
  batch_size=8,
201
  )
202
+ # 转换为简体字
203
+ simplified_text = converter.convert(output["text"])
204
+ return simplified_text
205
 
206
 
207
  with gr.Blocks() as demo: