ChatGPTwithAPI

Sleeping

File size: 5,413 Bytes

bc5816a
 
 
 
 
12689e0
9e5ac4f
bc5816a
 
95f266c
 
bc5816a
95f266c
bc5816a
 
75d4f46
bc5816a
95f266c
 
bc5816a
 
 
 
 
 
 
 
 
 
 
95f266c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75d4f46
95f266c
 
 
 
 
 
 
 
 
 
 
bc5816a
95f266c
bc5816a
 
 
 
 
 
 
 
70fc8f6
bc5816a
 
 
70fc8f6
bc5816a
70fc8f6
8b15b39
8a41c7d
aa8bc3d
8b15b39
 
df2fe5f
8b15b39
 
 
 
 
 
 
8a41c7d
bc5816a
 
 
 
b9026b3
bc5816a
 
 
 
 
 
 
 
95f266c
bc5816a
3053875
 
ed93fa4
3053875
 
 
 
bc5816a
64b40fc
d7204b9
495f3ef
d7204b9
01ecfe0
c1e194e
5fd8152
bc5816a
 
 
5e1e0d1
 
97dfe6e
 
95f266c
bc5816a
97dfe6e
 
bc5816a
 
 
97dfe6e
bc5816a
04a4fb7

import gradio as gr
import os 
import json 
import requests


#Streaming endpoint 
API_URL = "https://api.openai.com/v1/chat/completions" #os.getenv("API_URL") + "/generate_stream"

#Testing with my Open AI Key 
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 

def predict(inputs, top_p, temperature, openai_api_key, chat_counter, chatbot=[], history=[]):  #repetition_penalty, top_k

    payload = {
    "model": "gpt-4-1106-preview",
    "messages": [{"role": "user", "content": f"{inputs}"}],
    "temperature" : 1.0,
    "top_p":1.0,
    "n" : 1,
    "stream": True,
    "presence_penalty":0,
    "frequency_penalty":0,
    }

    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {openai_api_key}"
    }

    print(f"chat_counter - {chat_counter}")
    if chat_counter != 0 :
        messages=[]
        for data in chatbot:
          temp1 = {}
          temp1["role"] = "user" 
          temp1["content"] = data[0] 
          temp2 = {}
          temp2["role"] = "assistant" 
          temp2["content"] = data[1]
          messages.append(temp1)
          messages.append(temp2)
        temp3 = {}
        temp3["role"] = "user" 
        temp3["content"] = inputs
        messages.append(temp3)
        #messages
        payload = {
        "model": "gpt-4-1106-preview",
        "messages": messages, #[{"role": "user", "content": f"{inputs}"}],
        "temperature" : temperature, #1.0,
        "top_p": top_p, #1.0,
        "n" : 1,
        "stream": True,
        "presence_penalty":0,
        "frequency_penalty":0,
        }

    chat_counter+=1

    history.append(inputs)
    print(f"payload is - {payload}")
    # make a POST request to the API endpoint using the requests.post method, passing in stream=True
    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
    #response = requests.post(API_URL, headers=headers, json=payload, stream=True)
    token_counter = 0 
    partial_words = "" 

    counter=0
    for chunk in response.iter_lines():
        #Skipping first chunk
        if counter == 0:
          counter+=1
          continue
        #counter+=1
        # check whether each line is non-empty
        if chunk.decode() :
          chunk = chunk.decode()
          # decode each line as response data is in bytes
          if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
              #if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
              #  break
              partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
              if token_counter == 0:
                history.append(" " + partial_words)
              else:
                history[-1] = partial_words
              chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ]  # convert to tuples of list
              token_counter+=1
              yield chat, history, chat_counter  # resembles {chatbot: chat, state: history}  
                   

def reset_textbox():
    return gr.update(value='')

title = """<h1 align="center">🔥ChatGPT-4 Turbo API 🚀Streaming🚀</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-3.5-turbo LLM.
"""

css = """
#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
#chatbot {height: 520px; overflow: auto;}
"""

with gr.Blocks(css=css) as demo:
    gr.HTML(title)
    #gr.HTML('''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPTwithAPI?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
    with gr.Column(elem_id="col_container"):
        openai_api_key = gr.Textbox(type='password', label="Insira sua chave de API OpenAI aqui")
        chatbot = gr.Chatbot(elem_id="chatbot") 
        inputs = gr.Textbox(placeholder="Olá!", label="Digite uma entrada e pressione Enter", lines=3)
        state = gr.State([]) 
        b1 = gr.Button(value="Executar", variant="primary")
    
        #inputs, top_p, temperature, top_k, repetition_penalty
        with gr.Accordion("Parameters", open=False):
            top_p = gr.Slider( minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, label="Top-p (nucleus sampling)",)
            temperature = gr.Slider( minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, label="Temperature",)
            #top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
            #repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
            chat_counter = gr.Number(value=0, visible=False, precision=0)

    inputs.submit( predict, [inputs, top_p, temperature, openai_api_key, chat_counter, chatbot, state], [chatbot, state, chat_counter],)
    b1.click( predict, [inputs, top_p, temperature, openai_api_key, chat_counter, chatbot, state], [chatbot, state, chat_counter],)
    b1.click(reset_textbox, [], [inputs])
    inputs.submit(reset_textbox, [], [inputs])
                    
    #gr.Markdown(description)
    demo.queue().launch(debug=True)