import gradio as gr
import requests
import json
def greet(data):
  headers = {
    "Authorization": "Bearer 325618b3f00d46a3bdb01340",
    "Content-Type": "application/json"
  }

  body = {
    "text": data,
    
    "message" : "", 
    "top_p": 0.8, #DEFAULT 0.8 Top-P is an alternative way of controlling the randomness and creativity of the generated text. We recommend that only one of Temperature or Top P are used, 
                  #so when using one of them, make sure that the other is set to 1. A rough rule of thumb is that Top-P provides better control for applications in which GPT-J is expected to generate text with accuracy and correctness, 
                  #while Temperature works best for those applications in which original, creative or even amusing responses are sought.
    "top_k": 40, #DEFAULT 40 Top-K sampling means sorting by probability and zero-ing out the probabilities for anything below the k'th token. A lower value improves quality by removing the tail and making it less likely to go off topic.
    "temperature": 0.0, #DEFAULT 0.0, Temperature controls the randomness of the generated text. A value of 0 makes the engine deterministic, which means that it will always generate the same output for a given input text. A value of 1 makes the engine take the most risks and use a lot of creativity.
                        #As a frame of reference, it is common for story completion or idea generation to see temperature values between 0.7 to 0.9.
    "repetition_penalty":  1.0, #DEFAULT 1.0 Repetition penalty works by lowering the chances of a word being selected again the more times that word has already been used. In other words, it works to prevent repetitive word usage.
    "length": 300
    }

  res = requests.post(
    "https://shared-api.forefront.link/organization/GuejzaCOIXGT/codegen-16b-nl/completions/Gu6OxnDd8Tur",
    json=body,
    headers=headers
  )

  data = res.json()
  return data['result']
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()