File size: 3,829 Bytes
027a338
 
 
 
 
 
2f9dcad
027a338
 
91f7830
 
3ac7c08
027a338
42dc391
 
027a338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91f7830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
027a338
3ac7c08
91f7830
3ac7c08
91f7830
027a338
91f7830
027a338
91f7830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
027a338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import gradio as gr
from dotenv import load_dotenv
import openai
from utils import compress
from google_manager.fassade import Fassade
from google.oauth2.credentials import Credentials
from description import DESCRIPTION
import gradio as gr
from utils import credentials_to_dict
import asyncio
import logging

logging.basicConfig(level=logging.INFO)

load_dotenv()

# configuring openai package
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY


def load_prompt(path):
    with open(path) as f:
        lines = f.readlines()
        return "".join(lines)


def chat(passage, max_tokens=256, temprature=0, debug=False):

    if debug:
        passage = """
        A car or automobile is a motor vehicle with wheels. Most definitions of cars say that they run primarily on roads, seat one to eight people, have four wheels, and mainly transport people (rather than goods).
        """

    prompt = load_prompt("summary_prompt.txt").replace("<<SUMMARY>>", passage)

    summary = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
    )

    return summary["choices"][0]["message"]["content"].strip()


def transcribe(audio_file):
    audio_file = open(audio_file, "rb")
    transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en")
    transcription = transcription["text"]
    return transcription


# def predict(input, request: gr.Request, history=[]):
#     compress(input)
#     print("whisper starts")
#     transcription = transcribe(input)
#     print("whisper ends")
#     print("gpt starts")
#     answer = chat(transcription)
#     print("gpt ends")

#     # upload the input/answer to google drive
#     session_dict = vars(request.session)
#     if "credentials" in session_dict:
#         creds = Credentials(**vars(session_dict["credentials"]))

#         doc_content = "user:\n" f"{transcription}\n" "\n" "summary:\n" f"{answer}\n"
#         Fassade.upload_to_drive(creds, doc_content)
#         # request.session["credentials"] = credentials_to_dict(creds)
#         setattr(request.session, "credentials", credentials_to_dict(creds))


async def predict(input, request: gr.Request, history=[]):
    compress(input)
    logging.info("Starting HTTP request to Whisper API")
    transcription = await asyncio.to_thread(transcribe, input)
    logging.info("Starting HTTP request to GPT-3.5 API")
    answer = await asyncio.to_thread(chat, transcription)

    loop = asyncio.get_event_loop()
    # upload the input/answer to google drive
    session_dict = vars(request.session)
    if "credentials" in session_dict:
        creds = Credentials(**vars(session_dict["credentials"]))

        doc_content = "user:\n" f"{transcription}\n" "\n" "summary:\n" f"{answer}\n"
        # await asyncio.to_thread(Fassade.upload_to_drive, creds, doc_content)
        loop.run_in_executor(None, Fassade.upload_to_drive, creds, doc_content)
        setattr(request.session, "credentials", credentials_to_dict(creds))

    # session_data = request.session.get("credentials", None)
    # if session_data:
    #     creds = Credentials(**vars(session_data["credentials"]))

    #     doc_content = "user:\n" f"{transcription}\n" "\n" "summary:\n" f"{answer}\n"
    #     Fassade.upload_to_drive(creds, doc_content)
    #     session_data.update(credentials_to_dict(creds))
    #     request.session["credentials"] = session_data

    history.append((transcription, answer))
    response = history
    return response, history


with gr.Blocks() as Ui:
    gr.Markdown(DESCRIPTION)
    chatbot = gr.Chatbot()
    state = gr.State([])

    with gr.Row():
        audio_file = gr.Audio(label="Audio", source="microphone", type="filepath")

    audio_file.change(predict, [audio_file, state], [chatbot, state])