File size: 11,970 Bytes
0ca719f
 
 
 
b7ba5fb
0ca719f
b7ba5fb
48f3d89
bd2f0d0
48f3d89
d182662
9c0b98f
0ca719f
4082345
8ee0b64
0ca719f
 
 
 
 
 
 
 
 
b051926
 
 
 
 
dab3682
 
b051926
 
bc1215e
 
b051926
 
 
 
5e977c0
b051926
 
 
 
 
 
 
 
dab3682
 
5e5f20d
bc1215e
 
5e5f20d
 
 
 
dab3682
5e5f20d
 
 
 
 
 
 
 
40ae977
083e8d5
 
 
 
 
 
 
 
dab3682
083e8d5
 
 
 
 
 
 
 
 
5e5f20d
b051926
 
dab3682
bc1215e
dab3682
b051926
 
 
 
 
 
dab3682
 
52264b1
 
b051926
52264b1
 
44d039c
b7ba5fb
 
 
 
6091c72
b7ba5fb
b051926
 
 
dab3682
 
7e749fe
083e8d5
dab3682
083e8d5
7222f55
dab3682
083e8d5
dab3682
083e8d5
 
 
bd2f0d0
dab3682
494d547
b051926
 
 
 
 
 
 
 
 
3ef2f86
 
 
b051926
 
 
 
 
 
 
 
 
 
0ca719f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548d77e
 
0ca719f
 
 
 
 
 
 
 
b051926
 
 
 
 
 
 
 
 
0ca719f
 
 
 
 
 
 
 
 
548d77e
0ca719f
 
 
 
 
b051926
0ca719f
b051926
2d533cf
b051926
0ca719f
 
 
 
 
 
 
548d77e
0ca719f
 
 
 
 
b051926
0ca719f
b051926
2d533cf
b051926
0ca719f
b051926
 
 
 
 
 
 
bd2f0d0
dab3682
 
40ae977
e818920
 
 
 
 
 
 
 
 
 
 
 
40ae977
e818920
 
 
 
 
dab3682
e818920
 
 
a5339f4
4082345
a5339f4
bd2f0d0
 
4082345
 
bd2f0d0
 
4082345
b051926
 
 
48f3d89
 
548d77e
b051926
bd2f0d0
 
 
dab3682
bd2f0d0
40ae977
bd2f0d0
4082345
bd2f0d0
 
 
 
6091c72
b051926
 
 
 
 
 
 
 
 
 
48f3d89
b051926
 
 
 
 
494d547
2d533cf
b051926
 
 
 
 
 
0ca719f
 
 
 
 
b051926
0ca719f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr
import os

hugapikey=os.environ['openaikey']
#hugapikey='test'
genaikey=os.environ['genaikey']
#genaikey='test'
#MODEL_NAME = "seiching/whisper-small-seiching"
MODEL_NAME = "openai/whisper-tiny"
BATCH_SIZE = 8
DEFAULTPROMPT='你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化'
#
device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor
import tiktoken


def call_openai_makenote(openaiobj,transcription,usemodelname):
    ## 直接做會議紀錄,GPT4或GPT 3.5但小於16K

    response = openaiobj.chat.completions.create(
        #model="gpt-3.5-turbo",
        model=usemodelname,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response.choices[0].message.content
def call_openai_summary(openaiobj,transcription,usemodelname):
## 分段摘要
    response = openaiobj.chat.completions.create(
        #model="gpt-3.5-turbo",
        model=usemodelname,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先校正,再摘要會議重點內容"
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response.choices[0].message.content
def call_openai_summaryall(openaiobj,transcription,usemodelname):

    response = openaiobj.chat.completions.create(
        #model="gpt-3.5-turbo",
        model=usemodelname,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": "你是專業的會議紀錄製作員,請根據分段的會議摘要,彙整成正式會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化"
            },
            {
                "role": "user",
                "content": transcription
            }
        ]
    )
    return response.choices[0].message.content
    



def split_into_chunks(text,LLMmodel, tokens=15900):
    #encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')
    encoding = tiktoken.encoding_for_model(LLMmodel)
    words = encoding.encode(text)
    chunks = []
    for i in range(0, len(words), tokens):
        chunks.append(' '.join(encoding.decode(words[i:i + tokens])))
    return chunks

def gpt3write(openaikeystr,inputtext,LLMmodel):
 
    # openaiobj = OpenAI(
    # # This is the default and can be omitted

    # api_key=openaikeystr,
    # )
    if hugapikey=='test':
        realkey=openaikeystr
    else:
        realkey=hugapikey   

    #openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
    openaiobj =OpenAI( api_key=realkey)
    text = inputtext
    #openaikey.set_key(openaikeystr)
    #print('process_chunk',openaikey.get_key())
    chunks = split_into_chunks(text,LLMmodel)
   
    i=1
    if len(chunks)>1:
        response='這是分段會議紀錄摘要\n\n'
        for chunk in chunks:
      
          response=response+'第' +str(i)+'段\n'+call_openai_summary(openaiobj,chunk,LLMmodel)+'\n\n'
          i=i+1
        finalresponse=response+'\n\n 這是根據以上分段會議紀錄彙編如下 \n\n' +call_openai_summaryall(openaiobj,response,LLMmodel)
      # response=response+call_openai_summary(openaiobj,chunk)

        
    else:
        finalresponse=call_openai_makenote(openaiobj,inputtext,LLMmodel)
    return finalresponse
    # # Processes chunks in parallel
    # with ThreadPoolExecutor() as executor:
    #     responses = list(executor.map(call_openai_api, [openaiobj,chunks]))
    # return responses
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr



transcribe_text=""

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
    if seconds is not None:
        milliseconds = round(seconds * 1000.0)

        hours = milliseconds // 3_600_000
        milliseconds -= hours * 3_600_000

        minutes = milliseconds // 60_000
        milliseconds -= minutes * 60_000

        seconds = milliseconds // 1_000
        milliseconds -= seconds * 1_000

        hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
        return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
    else:
        # we have a malformed timestamp so just return it as is
        return seconds


def transcribe(file,  return_timestamps):
    outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe","language": "chinese",}, return_timestamps=return_timestamps)
    text = outputs["text"]
    if return_timestamps:
        timestamps = outputs["chunks"]
        timestamps = [
            f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}"
            for chunk in timestamps
        ]
        text = "\n".join(str(feature) for feature in timestamps)
    global transcribe_text
    transcribe_text=text
    # with open('asr_resul.txt', 'w') as f:
    #   f.write(text)

    # ainotes=process_chunks(text)
    # with open("ainotes_result.txt", "a") as f:
    #   f.write(ainotes)

    return text


demo = gr.Blocks()

mic_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
       # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
        gr.inputs.Checkbox(default=False, label="Return timestamps"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME})如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax) 先做語音辨識再做會議紀錄摘要"
        " 長度沒有限制"
    ),
    allow_flagging="never",
)
file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
       # gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
        gr.inputs.Checkbox(default=False, label="Return timestamps"),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax),先做語音辨識再做會議紀錄摘要"
        " 長度沒有限制"
    ),
    # examples=[
    #     ["./example.flac", "transcribe", False],
    #     ["./example.flac", "transcribe", True],
    # ],
    cache_examples=True,
    allow_flagging="never",
)
import google.generativeai as genai


def gpt4write(openaikeystr,transcribe_text,LLMmodel):
        # openaiobj = OpenAI(
    # # This is the default and can be omitted

    # api_key=openaikeystr,
    # )
    if hugapikey=='test':
        realkey=openaikeystr
    else:
        realkey=hugapikey   

    #openaiojb =OpenAI(base_url="http://localhost:1234/v1", api_key="not-needed")
    openaiobj =OpenAI( api_key=realkey)
    #text = inputtext
    #openaikey.set_key(openaikeystr)
    #print('process_chunk',openaikey.get_key())
    #chunks = split_into_chunks(text)
    #response='這是分段會議紀錄結果\n\n'
      
    finalresponse=call_openai_makenote(openaiobj,transcribe_text,LLMmodel)
      # response=response+call_openai_summary(openaiobj,chunk)
    return finalresponse

    return 'ok'
def gewritenote(prompt,inputscript):
    api_key = genaikey
    genai.configure(api_key = api_key)
    model = genai.GenerativeModel('gemini-pro')
    #genprompt='你是專業的會議紀錄製作員,請根據由語音辨識軟體將會議錄音所轉錄的逐字稿,也請注意逐字稿可能有錯,請先做校正,討論內容細節請略過,請根據校正過的逐字稿撰寫會議紀錄,並要用比較正式及容易閱讀的寫法,避免口語化'
    genprompt=prompt+'#'+inputscript+'#'
    response = model.generate_content( genprompt)
    return response.text
def writenotes( LLMmodel,apikeystr,prompt,inputscript):
  #text=transcribe_text
  #openaikey.set_key(inputkey)
  #openaikey = OpenAIKeyClass(inputkey)
  if(len(prompt))<10:
      prompt=DEFAULTPROMPT
  global transcribe_text
  print('ok')
  if len(inputscript)>10: #有資料表示不是來自語音辨識結果
      transcribe_text=inputscript
  if LLMmodel=="gpt-3.5-turbo": 
      ainotestext=gpt3write(apikeystr,transcribe_text,LLMmodel)
  elif LLMmodel=="gpt-4-0125-preview": 
      ainotestext=gpt4write(apikeystr,transcribe_text,LLMmodel)
  elif LLMmodel=='gemini':
      ainotestext=gewritenote(prompt,transcribe_text)




   # ainotestext=inputscript
  #ainotestext=""
  # with open('asr_resul.txt', 'w') as f:
  #     #print(transcribe_text)
  #    # f.write(inputkey)
  #     f.write(transcribe_text)
  # with open('ainotes.txt','w') as f:
  #   f.write(ainotestext)
  return ainotestext
ainotes = gr.Interface(
    fn=writenotes,
    inputs=[ gr.inputs.Radio(["gemini","gpt-3.5-turbo", "gpt-4-0125-preview"], label="LLMmodel", default="gemini"),gr.Textbox(label="使用GPT請輸入OPEN AI API KEY",placeholder="請輸入sk..."),gr.Textbox(label="自訂提示詞(prompt)若無會用以下預設值",info=DEFAULTPROMPT),gr.Textbox(label="逐字稿",placeholder="若沒有做語音辨識,請輸入逐字稿")],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="會議紀錄小幫手AINotes",
    description=(
        "可由麥克風錄音或上傳語音檔,並將本逐字稿欄位清空,若有逐字稿可以直接貼在逐字稿"
        f" 使用這個模型 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) 如果覺得速度有點慢, 可以用(https://huggingface.co/spaces/sanchit-gandhi/whisper-jax), 做完語音辨識再貼過來做會議紀錄摘要"
        " 長度沒有限制"
    ),
    # examples=[
    #     ["./example.flac", "transcribe", False],
    #     ["./example.flac", "transcribe", True],
    # ],
    cache_examples=True,
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_transcribe,mic_transcribe,ainotes], ["語音檔辨識","麥克風語音檔辨識","產生會議紀錄" ])

demo.launch(enable_queue=True)