seiching commited on
Commit
dab3682
1 Parent(s): 40ae977

update gpt3write

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -23,9 +23,9 @@ from openai import OpenAI
23
  from concurrent.futures import ThreadPoolExecutor
24
  import tiktoken
25
 
26
- usemodelname='gpt-4-0125-preview'
27
 
28
- def call_openai_api(openaiobj,transcription,usemodelname):
 
29
 
30
  response = openaiobj.chat.completions.create(
31
  #model="gpt-3.5-turbo",
@@ -34,7 +34,7 @@ def call_openai_api(openaiobj,transcription,usemodelname):
34
  messages=[
35
  {
36
  "role": "system",
37
- "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,討論內容細節請略過,請列出經主席確認的會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
38
  },
39
  {
40
  "role": "user",
@@ -43,8 +43,8 @@ def call_openai_api(openaiobj,transcription,usemodelname):
43
  ]
44
  )
45
  return response.choices[0].message.content
46
- def call_openai_summary(openaiobj,transcription):
47
-
48
  response = openaiobj.chat.completions.create(
49
  #model="gpt-3.5-turbo",
50
  model=usemodelname,
@@ -52,7 +52,7 @@ def call_openai_summary(openaiobj,transcription):
52
  messages=[
53
  {
54
  "role": "system",
55
- "content": "你是專業的會議紀錄製作員,請根據分段的會議決證,彙整成正式會議紀錄"
56
  },
57
  {
58
  "role": "user",
@@ -70,7 +70,7 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
70
  messages=[
71
  {
72
  "role": "system",
73
- "content": "你是專業的會議紀錄製作員,請根據分段的會議決證,彙整成正式會議紀錄"
74
  },
75
  {
76
  "role": "user",
@@ -83,16 +83,17 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
83
 
84
 
85
 
86
- def split_into_chunks(text, tokens=15900):
87
  #encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
88
- encoding = tiktoken.encoding_for_model(usemodelname)
89
  words = encoding.encode(text)
90
  chunks = []
91
  for i in range(0, len(words), tokens):
92
  chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
93
  return chunks
94
 
95
- def process_chunks(openaikeystr,inputtext,LLMmodel):
 
96
  # openaiobj = OpenAI(
97
  # # This is the default and can be omitted
98
 
@@ -108,21 +109,21 @@ def process_chunks(openaikeystr,inputtext,LLMmodel):
108
  text = inputtext
109
  #openaikey.set_key(openaikeystr)
110
  #print('process_chunk',openaikey.get_key())
111
- chunks = split_into_chunks(text)
112
- response='這是分段會議紀錄結果\n\n'
113
  i=1
114
  if len(chunks)>1:
115
-
116
  for chunk in chunks:
117
 
118
- response=response+'第' +str(i)+'段\n'+call_openai_api(openaiobj,chunk,LLMmodel)+'\n\n'
119
  i=i+1
120
- finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_api(openaiobj,response,LLMmodel)
121
  # response=response+call_openai_summary(openaiobj,chunk)
122
 
123
 
124
  else:
125
- finalresponse=call_openai_api(openaiobj,chunk[0],LLMmodel)
126
  return finalresponse
127
  # # Processes chunks in parallel
128
  # with ThreadPoolExecutor() as executor:
@@ -234,6 +235,8 @@ file_transcribe = gr.Interface(
234
  allow_flagging="never",
235
  )
236
  import google.generativeai as genai
 
 
237
  def gpt4write(openaikeystr,transcribe_text,LLMmodel):
238
  # openaiobj = OpenAI(
239
  # # This is the default and can be omitted
@@ -253,7 +256,7 @@ def gpt4write(openaikeystr,transcribe_text,LLMmodel):
253
  #chunks = split_into_chunks(text)
254
  #response='這是分段會議紀錄結果\n\n'
255
 
256
- finalresponse=call_openai_api(openaiobj,transcribe_text,LLMmodel)
257
  # response=response+call_openai_summary(openaiobj,chunk)
258
  return finalresponse
259
 
@@ -274,7 +277,7 @@ def writenotes( LLMmodel,apikeystr,inputscript):
274
  if len(inputscript)>10: #有資料表示不是來自語音辨識結果
275
  transcribe_text=inputscript
276
  if LLMmodel=="gpt-3.5-turbo":
277
- ainotestext=process_chunks(apikeystr,transcribe_text,LLMmodel)
278
  elif LLMmodel=="gpt-4-0125-preview":
279
  ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
280
  elif LLMmodel=='gemini':
 
23
  from concurrent.futures import ThreadPoolExecutor
24
  import tiktoken
25
 
 
26
 
27
+ def call_openai_makenote(openaiobj,transcription,usemodelname):
28
+ ## 直接做會議紀錄,GPT4或GPT 3.5但小於16K
29
 
30
  response = openaiobj.chat.completions.create(
31
  #model="gpt-3.5-turbo",
 
34
  messages=[
35
  {
36
  "role": "system",
37
+ "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請列出會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
38
  },
39
  {
40
  "role": "user",
 
43
  ]
44
  )
45
  return response.choices[0].message.content
46
+ def call_openai_summary(openaiobj,transcription,usemodelname):
47
+ ## 分段摘要
48
  response = openaiobj.chat.completions.create(
49
  #model="gpt-3.5-turbo",
50
  model=usemodelname,
 
52
  messages=[
53
  {
54
  "role": "system",
55
+ "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先校正,再摘要會議重點內容"
56
  },
57
  {
58
  "role": "user",
 
70
  messages=[
71
  {
72
  "role": "system",
73
+ "content": "你是專業的會議紀錄製作員,請根據分段的會議摘要,彙整成正式會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
74
  },
75
  {
76
  "role": "user",
 
83
 
84
 
85
 
86
+ def split_into_chunks(text,LLMmodel, tokens=15900):
87
  #encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
88
+ encoding = tiktoken.encoding_for_model(LLMmodel)
89
  words = encoding.encode(text)
90
  chunks = []
91
  for i in range(0, len(words), tokens):
92
  chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
93
  return chunks
94
 
95
+ def gpt3write(openaikeystr,inputtext,LLMmodel):
96
+
97
  # openaiobj = OpenAI(
98
  # # This is the default and can be omitted
99
 
 
109
  text = inputtext
110
  #openaikey.set_key(openaikeystr)
111
  #print('process_chunk',openaikey.get_key())
112
+ chunks = split_into_chunks(text,LLMmodel)
113
+
114
  i=1
115
  if len(chunks)>1:
116
+ response='這是分段會議紀錄摘要\n\n'
117
  for chunk in chunks:
118
 
119
+ response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
120
  i=i+1
121
+ finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
122
  # response=response+call_openai_summary(openaiobj,chunk)
123
 
124
 
125
  else:
126
+ finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
127
  return finalresponse
128
  # # Processes chunks in parallel
129
  # with ThreadPoolExecutor() as executor:
 
235
  allow_flagging="never",
236
  )
237
  import google.generativeai as genai
238
+
239
+
240
  def gpt4write(openaikeystr,transcribe_text,LLMmodel):
241
  # openaiobj = OpenAI(
242
  # # This is the default and can be omitted
 
256
  #chunks = split_into_chunks(text)
257
  #response='這是分段會議紀錄結果\n\n'
258
 
259
+ finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
260
  # response=response+call_openai_summary(openaiobj,chunk)
261
  return finalresponse
262
 
 
277
  if len(inputscript)>10: #有資料表示不是來自語音辨識結果
278
  transcribe_text=inputscript
279
  if LLMmodel=="gpt-3.5-turbo":
280
+ ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
281
  elif LLMmodel=="gpt-4-0125-preview":
282
  ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
283
  elif LLMmodel=='gemini':