Spaces:
Sleeping
Sleeping
update gpt3write
Browse files
app.py
CHANGED
@@ -23,9 +23,9 @@ from openai import OpenAI
|
|
23 |
from concurrent.futures import ThreadPoolExecutor
|
24 |
import tiktoken
|
25 |
|
26 |
-
usemodelname='gpt-4-0125-preview'
|
27 |
|
28 |
-
def
|
|
|
29 |
|
30 |
response = openaiobj.chat.completions.create(
|
31 |
#model="gpt-3.5-turbo",
|
@@ -34,7 +34,7 @@ def call_openai_api(openaiobj,transcription,usemodelname):
|
|
34 |
messages=[
|
35 |
{
|
36 |
"role": "system",
|
37 |
-
"content": "
|
38 |
},
|
39 |
{
|
40 |
"role": "user",
|
@@ -43,8 +43,8 @@ def call_openai_api(openaiobj,transcription,usemodelname):
|
|
43 |
]
|
44 |
)
|
45 |
return response.choices[0].message.content
|
46 |
-
def call_openai_summary(openaiobj,transcription):
|
47 |
-
|
48 |
response = openaiobj.chat.completions.create(
|
49 |
#model="gpt-3.5-turbo",
|
50 |
model=usemodelname,
|
@@ -52,7 +52,7 @@ def call_openai_summary(openaiobj,transcription):
|
|
52 |
messages=[
|
53 |
{
|
54 |
"role": "system",
|
55 |
-
"content": "
|
56 |
},
|
57 |
{
|
58 |
"role": "user",
|
@@ -70,7 +70,7 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
|
|
70 |
messages=[
|
71 |
{
|
72 |
"role": "system",
|
73 |
-
"content": "
|
74 |
},
|
75 |
{
|
76 |
"role": "user",
|
@@ -83,16 +83,17 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
|
|
83 |
|
84 |
|
85 |
|
86 |
-
def split_into_chunks(text, tokens=15900):
|
87 |
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
|
88 |
-
encoding = tiktoken.encoding_for_model(
|
89 |
words = encoding.encode(text)
|
90 |
chunks = []
|
91 |
for i in range(0, len(words), tokens):
|
92 |
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
|
93 |
return chunks
|
94 |
|
95 |
-
def
|
|
|
96 |
# openaiobj = OpenAI(
|
97 |
# # This is the default and can be omitted
|
98 |
|
@@ -108,21 +109,21 @@ def process_chunks(openaikeystr,inputtext,LLMmodel):
|
|
108 |
text = inputtext
|
109 |
#openaikey.set_key(openaikeystr)
|
110 |
#print('process_chunk',openaikey.get_key())
|
111 |
-
chunks = split_into_chunks(text)
|
112 |
-
|
113 |
i=1
|
114 |
if len(chunks)>1:
|
115 |
-
|
116 |
for chunk in chunks:
|
117 |
|
118 |
-
response=response+'第' +str(i)+'段\n'+
|
119 |
i=i+1
|
120 |
-
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +
|
121 |
# response=response+call_openai_summary(openaiobj,chunk)
|
122 |
|
123 |
|
124 |
else:
|
125 |
-
finalresponse=
|
126 |
return finalresponse
|
127 |
# # Processes chunks in parallel
|
128 |
# with ThreadPoolExecutor() as executor:
|
@@ -234,6 +235,8 @@ file_transcribe = gr.Interface(
|
|
234 |
allow_flagging="never",
|
235 |
)
|
236 |
import google.generativeai as genai
|
|
|
|
|
237 |
def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
238 |
# openaiobj = OpenAI(
|
239 |
# # This is the default and can be omitted
|
@@ -253,7 +256,7 @@ def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
|
253 |
#chunks = split_into_chunks(text)
|
254 |
#response='這是分段會議紀錄結果\n\n'
|
255 |
|
256 |
-
finalresponse=
|
257 |
# response=response+call_openai_summary(openaiobj,chunk)
|
258 |
return finalresponse
|
259 |
|
@@ -274,7 +277,7 @@ def writenotes( LLMmodel,apikeystr,inputscript):
|
|
274 |
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
|
275 |
transcribe_text=inputscript
|
276 |
if LLMmodel=="gpt-3.5-turbo":
|
277 |
-
ainotestext=
|
278 |
elif LLMmodel=="gpt-4-0125-preview":
|
279 |
ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
|
280 |
elif LLMmodel=='gemini':
|
|
|
23 |
from concurrent.futures import ThreadPoolExecutor
|
24 |
import tiktoken
|
25 |
|
|
|
26 |
|
27 |
+
def call_openai_makenote(openaiobj,transcription,usemodelname):
|
28 |
+
## 直接做會議紀錄,GPT4或GPT 3.5但小於16K
|
29 |
|
30 |
response = openaiobj.chat.completions.create(
|
31 |
#model="gpt-3.5-turbo",
|
|
|
34 |
messages=[
|
35 |
{
|
36 |
"role": "system",
|
37 |
+
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請列出會議決議,並要用比較正式及容易閱讀的寫法,避免口語化"
|
38 |
},
|
39 |
{
|
40 |
"role": "user",
|
|
|
43 |
]
|
44 |
)
|
45 |
return response.choices[0].message.content
|
46 |
+
def call_openai_summary(openaiobj,transcription,usemodelname):
|
47 |
+
## 分段摘要
|
48 |
response = openaiobj.chat.completions.create(
|
49 |
#model="gpt-3.5-turbo",
|
50 |
model=usemodelname,
|
|
|
52 |
messages=[
|
53 |
{
|
54 |
"role": "system",
|
55 |
+
"content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先校正,再摘要會議重點內容"
|
56 |
},
|
57 |
{
|
58 |
"role": "user",
|
|
|
70 |
messages=[
|
71 |
{
|
72 |
"role": "system",
|
73 |
+
"content": "你是專業的會議紀錄製作員,請根據分段的會議摘要,彙整成正式會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
|
74 |
},
|
75 |
{
|
76 |
"role": "user",
|
|
|
83 |
|
84 |
|
85 |
|
86 |
+
def split_into_chunks(text,LLMmodel, tokens=15900):
|
87 |
#encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
|
88 |
+
encoding = tiktoken.encoding_for_model(LLMmodel)
|
89 |
words = encoding.encode(text)
|
90 |
chunks = []
|
91 |
for i in range(0, len(words), tokens):
|
92 |
chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
|
93 |
return chunks
|
94 |
|
95 |
+
def gpt3write(openaikeystr,inputtext,LLMmodel):
|
96 |
+
|
97 |
# openaiobj = OpenAI(
|
98 |
# # This is the default and can be omitted
|
99 |
|
|
|
109 |
text = inputtext
|
110 |
#openaikey.set_key(openaikeystr)
|
111 |
#print('process_chunk',openaikey.get_key())
|
112 |
+
chunks = split_into_chunks(text,LLMmodel)
|
113 |
+
|
114 |
i=1
|
115 |
if len(chunks)>1:
|
116 |
+
response='這是分段會議紀錄摘要\n\n'
|
117 |
for chunk in chunks:
|
118 |
|
119 |
+
response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
|
120 |
i=i+1
|
121 |
+
finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
|
122 |
# response=response+call_openai_summary(openaiobj,chunk)
|
123 |
|
124 |
|
125 |
else:
|
126 |
+
finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
|
127 |
return finalresponse
|
128 |
# # Processes chunks in parallel
|
129 |
# with ThreadPoolExecutor() as executor:
|
|
|
235 |
allow_flagging="never",
|
236 |
)
|
237 |
import google.generativeai as genai
|
238 |
+
|
239 |
+
|
240 |
def gpt4write(openaikeystr,transcribe_text,LLMmodel):
|
241 |
# openaiobj = OpenAI(
|
242 |
# # This is the default and can be omitted
|
|
|
256 |
#chunks = split_into_chunks(text)
|
257 |
#response='這是分段會議紀錄結果\n\n'
|
258 |
|
259 |
+
finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
|
260 |
# response=response+call_openai_summary(openaiobj,chunk)
|
261 |
return finalresponse
|
262 |
|
|
|
277 |
if len(inputscript)>10: #有資料表示不是來自語音辨識結果
|
278 |
transcribe_text=inputscript
|
279 |
if LLMmodel=="gpt-3.5-turbo":
|
280 |
+
ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
|
281 |
elif LLMmodel=="gpt-4-0125-preview":
|
282 |
ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
|
283 |
elif LLMmodel=='gemini':
|