Spaces:

seiching
/

ainotes

Sleeping

App Files Files Community

seiching commited on Mar 8

Commit

dab3682

•

1 Parent(s): 40ae977

update gpt3write

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -23,9 +23,9 @@ from openai import OpenAI
 from concurrent.futures import ThreadPoolExecutor
 import tiktoken
-usemodelname='gpt-4-0125-preview'
-def call_openai_api(openaiobj,transcription,usemodelname):
     response = openaiobj.chat.completions.create(
         #model="gpt-3.5-turbo",
@@ -34,7 +34,7 @@ def call_openai_api(openaiobj,transcription,usemodelname):
         messages=[
             {
                 "role": "system",
-                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿，也請注意逐字稿可能有錯,討論內容細節請略過,請列出經主席確認的會議決議，並要用比較正式及容易閱讀的寫法，避免口語化"
             },
             {
                 "role": "user",
@@ -43,8 +43,8 @@ def call_openai_api(openaiobj,transcription,usemodelname):
         ]
     )
     return response.choices[0].message.content
-def call_openai_summary(openaiobj,transcription):
     response = openaiobj.chat.completions.create(
         #model="gpt-3.5-turbo",
         model=usemodelname,
@@ -52,7 +52,7 @@ def call_openai_summary(openaiobj,transcription):
         messages=[
             {
                 "role": "system",
-                "content": "你是專業的會議紀錄製作員，請根據分段的會議決證，彙整成正式會議紀錄"
             },
             {
                 "role": "user",
@@ -70,7 +70,7 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
         messages=[
             {
                 "role": "system",
-                "content": "你是專業的會議紀錄製作員，請根據分段的會議決證，彙整成正式會議紀錄"
             },
             {
                 "role": "user",
@@ -83,16 +83,17 @@ def call_openai_summaryall(openaiobj,transcription,usemodelname):
-def split_into_chunks(text, tokens=15900):
     #encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
-    encoding = tiktoken.encoding_for_model(usemodelname)
     words = encoding.encode(text)
     chunks = []
     for i in range(0, len(words), tokens):
         chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
     return chunks
-def process_chunks(openaikeystr,inputtext,LLMmodel):
     # openaiobj = OpenAI(
     # # This is the default and can be omitted
@@ -108,21 +109,21 @@ def process_chunks(openaikeystr,inputtext,LLMmodel):
     text = inputtext
     #openaikey.set_key(openaikeystr)
     #print('process_chunk',openaikey.get_key())
-    chunks = split_into_chunks(text)
-    response='這是分段會議紀錄結果\n\n'
     i=1
     if len(chunks)>1:
         for chunk in chunks:
-          response=response+'第' +str(i)+'段\n'+call_openai_api(openaiobj,chunk,LLMmodel)+'\n\n'
           i=i+1
-        finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_api(openaiobj,response,LLMmodel)
       # response=response+call_openai_summary(openaiobj,chunk)
     else:
-        finalresponse=call_openai_api(openaiobj,chunk[0],LLMmodel)
     return finalresponse
     # # Processes chunks in parallel
     # with ThreadPoolExecutor() as executor:
@@ -234,6 +235,8 @@ file_transcribe = gr.Interface(
     allow_flagging="never",
 )
 import google.generativeai as genai
 def gpt4write(openaikeystr,transcribe_text,LLMmodel):
         # openaiobj = OpenAI(
     # # This is the default and can be omitted
@@ -253,7 +256,7 @@ def gpt4write(openaikeystr,transcribe_text,LLMmodel):
     #chunks = split_into_chunks(text)
     #response='這是分段會議紀錄結果\n\n'
-    finalresponse=call_openai_api(openaiobj,transcribe_text,LLMmodel)
       # response=response+call_openai_summary(openaiobj,chunk)
     return finalresponse
@@ -274,7 +277,7 @@ def writenotes( LLMmodel,apikeystr,inputscript):
   if len(inputscript)>10: #有資料表示不是來自語音辨識結果
       transcribe_text=inputscript
   if LLMmodel=="gpt-3.5-turbo":
-      ainotestext=process_chunks(apikeystr,transcribe_text,LLMmodel)
   elif LLMmodel=="gpt-4-0125-preview":
       ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
   elif LLMmodel=='gemini':

 from concurrent.futures import ThreadPoolExecutor
 import tiktoken
+def call_openai_makenote(openaiobj,transcription,usemodelname):
+    ## 直接做會議紀錄,GPT4或GPT 3.5但小於16K
     response = openaiobj.chat.completions.create(
         #model="gpt-3.5-turbo",
         messages=[
             {
                 "role": "system",
+                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿，也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請列出會議決議，並要用比較正式及容易閱讀的寫法，避免口語化"
             },
             {
                 "role": "user",
         ]
     )
     return response.choices[0].message.content
+def call_openai_summary(openaiobj,transcription,usemodelname):
+## 分段摘要
     response = openaiobj.chat.completions.create(
         #model="gpt-3.5-turbo",
         model=usemodelname,
         messages=[
             {
                 "role": "system",
+                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿，也請注意逐字稿可能有錯,請先校正，再摘要會議重點內容"
             },
             {
                 "role": "user",
         messages=[
             {
                 "role": "system",
+                "content": "你是專業的會議紀錄製作員，請根據分段的會議摘要，彙整成正式會議紀錄，並要用比較正式及容易閱讀的寫法，避免口語化"
             },
             {
                 "role": "user",
+def split_into_chunks(text,LLMmodel, tokens=15900):
     #encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
+    encoding = tiktoken.encoding_for_model(LLMmodel)
     words = encoding.encode(text)
     chunks = []
     for i in range(0, len(words), tokens):
         chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
     return chunks
+def gpt3write(openaikeystr,inputtext,LLMmodel):
     # openaiobj = OpenAI(
     # # This is the default and can be omitted
     text = inputtext
     #openaikey.set_key(openaikeystr)
     #print('process_chunk',openaikey.get_key())
+    chunks = split_into_chunks(text,LLMmodel)
     i=1
     if len(chunks)>1:
+        response='這是分段會議紀錄摘要\n\n'
         for chunk in chunks:
+          response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
           i=i+1
+        finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
       # response=response+call_openai_summary(openaiobj,chunk)
     else:
+        finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
     return finalresponse
     # # Processes chunks in parallel
     # with ThreadPoolExecutor() as executor:
     allow_flagging="never",
 )
 import google.generativeai as genai
 def gpt4write(openaikeystr,transcribe_text,LLMmodel):
         # openaiobj = OpenAI(
     # # This is the default and can be omitted
     #chunks = split_into_chunks(text)
     #response='這是分段會議紀錄結果\n\n'
+    finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
       # response=response+call_openai_summary(openaiobj,chunk)
     return finalresponse
   if len(inputscript)>10: #有資料表示不是來自語音辨識結果
       transcribe_text=inputscript
   if LLMmodel=="gpt-3.5-turbo":
+      ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
   elif LLMmodel=="gpt-4-0125-preview":
       ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
   elif LLMmodel=='gemini':