Spaces:

harshp8l
/

transcriber-prompt

Runtime error

File size: 6,503 Bytes

9690d29

# USAGE keep using , (to prompt the bot via voice and keep adding on layers to message content for chat completion), 
# after successful run through backtick is the exit, early stop after instructions have been fulfilled

#TODO: Automatically stop when to recording using keystroke, Whisper? model
# MAKE SURE TO SPECIFY RECORD_SECONDS, remove comment and print statements, and add api key

import os
import pyaudio
import wave
from pynput import keyboard
import speech_recognition as sr
import time
import openai 
from bardapi import Bard
import google.generativeai as palm
openai.api_key = "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"
#bard = Bard(token='XAjsm7r2qks6TRYvnuR7nbMlAHeJRSd4DHZIhvQ5NdHjgcTOYngb2GN2juVCNZSLwkeDuQ.')
palm.configure(api_key='AIzaSyCLy2IgNwMBDbhYH_zvUDo0AMWQdRLQI0E')

FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 4                ##HERE##
#WAVE_OUTPUT_FILENAME = "2.wav"

frames = []

def update_chat(messages, content):
    messages.append({"role": "user", "content": content})
    return messages

# messages = [
    # # include in step - by - step more (0-shot)
    # {"role": "system", "content": "You are a super helpful tutor and excellent interviewee. In general you explain your thought process and concepts very well. You first explain simple brute force solutions to interview problem (no need to code) but still go over the time and space complexity, then you explain the steps leading you to the most optimized solution.  You explain the concepts and procedures of this optimized solution step by step and then you MUST provide the final code in python with its time and space complexity."}
# ]
messages = [
    # include in step - by - step more (0-shot)
    {"role": "system", "content": "You are a super helpful tutor and excellent interviewee. In general you explain your thought process and concepts very well. You first explain simple brute force solutions to system design interview problem, then make appropriate assumptions and explain the steps leading you to the most optimized solution. Provide functional and non-functional requirements, back of the envelope calculation, apis needed, data/databases if needed, and a high level scalable design.  You MUST explain the concepts and procedures of this optimized solution step by step while providing the system design architecture"}
]
global bard_context
bard_context = [messages[0]['content']]

def update_bard(messages, content):
    msg = ': '.join(messages) + content
    ans = bard.get_answer(msg)['content']
    messages.append(ans)
    return messages, ans

global palm_context
palm_context = [messages[0]['content']]

def update_palm(messages, content):
    msg = ': '.join(messages) + ': ' + content
    ans = palm.generate_text(
        model='models/text-bison-001',
        prompt=msg,
        temperature=0,
        # The maximum length of the response
        max_output_tokens=2000,
    ).result
    messages.append(ans)
    return messages, ans

    #Line to add: [Let's think step by step.]

def on_press(key):
    
    if key == keyboard.KeyCode.from_char('$'):
        # Start recording
        print("Ready...", end=" ")
        global stream, audio
        audio = pyaudio.PyAudio()
        print("Recording audio...")
        stream = audio.open(format=FORMAT, channels=CHANNELS,
                            rate=RATE, input=True,
                            frames_per_buffer=CHUNK,
                            input_device_index=2) # when brio is connected use mac
        for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK)
            frames.append(data)

        # Stop recording
        stream.stop_stream()
        stream.close()
        audio.terminate()
        print("Finished recording audio.")
        r = sr.Recognizer()
        audio_data = sr.AudioData(b''.join(frames), RATE, 2)
        #print(audio_data) 
        text = r.recognize_google(audio_data)
        #text = r.recognize_google(audio_data, language = 'en-US', show_all = True)
        print(text) # REMOVE THIS DURING ACTUAL
        print("Finished transcription")
        # TODO: Once audio is fully done then send to gpt to answer via another keystroke.
        # app currently keeps appending to transcription with each comma...
        
        #response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=update_chat(messages, text))
        #os.system('clear')
        #print(response['choices'][0]['message']['content'])
        #global bard_context
        #bard_context, response = update_bard(bard_context, text)
        global palm_context
        palm_context, response = update_palm(palm_context, text)
        os.system('clear')
        print(response)

    elif key == keyboard.KeyCode.from_char('`'):
        print("Exiting")
        return False
        
    elif key == keyboard.KeyCode.from_char('|'):
        text = input()
        print("Completing")
        #response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=update_chat(messages, text))
        #print(response['choices'][0]['message']['content'])
        #bard_context, response = update_bard(bard_context, text)
        palm_context, response = update_palm(palm_context, text)
        print(response)


    
def on_release(key):
    if key == keyboard.KeyCode.from_char('`'): # UNTESTED CODE
        return False

# Start the listener
with keyboard.Listener(on_press=on_press) as listener:
    listener.join()




# from bardapi import Bard
# 
# #initialize via F12 Application → Cookies → Copy the value of __Secure-1PSID cookie.
# token = 'XAjsm7r2qks6TRYvnuR7nbMlAHeJRSd4DHZIhvQ5NdHjgcTOYngb2GN2juVCNZSLwkeDuQ.'
# bard = Bard(token=token)
# 
# # Setup state
# context = "You are a super helpful tutor and excellent interviewee. In general you explain your thought process and concepts very well. You first explain simple brute force solutions to interview problem (no need to code) but still go over the time and space complexity, then you explain the steps leading you to the most optimized solution.  You finally explain the concepts and procedures of this optimized solution step by step and then you MUST provide the final code in python with its time and space complexity."
# bard.get_answer(context)
# prompt = input('You: ')
# print()
# 
# # Continue chat 
# while len(prompt) != 0:
#     print(f"Bard: {bard.get_answer(prompt)['content']}\n")
#     prompt = input('You: ')
#     print()
# 
#