import gradio as gr import requests import json def greet(data): headers = { "Authorization": "Bearer 325618b3f00d46a3bdb01340", "Content-Type": "application/json" } body = { "text": data, "message" : "", "top_p": 0.8, #DEFAULT 0.8 Top-P is an alternative way of controlling the randomness and creativity of the generated text. We recommend that only one of Temperature or Top P are used, #so when using one of them, make sure that the other is set to 1. A rough rule of thumb is that Top-P provides better control for applications in which GPT-J is expected to generate text with accuracy and correctness, #while Temperature works best for those applications in which original, creative or even amusing responses are sought. "top_k": 40, #DEFAULT 40 Top-K sampling means sorting by probability and zero-ing out the probabilities for anything below the k'th token. A lower value improves quality by removing the tail and making it less likely to go off topic. "temperature": 0.0, #DEFAULT 0.0, Temperature controls the randomness of the generated text. A value of 0 makes the engine deterministic, which means that it will always generate the same output for a given input text. A value of 1 makes the engine take the most risks and use a lot of creativity. #As a frame of reference, it is common for story completion or idea generation to see temperature values between 0.7 to 0.9. "repetition_penalty": 1.0, #DEFAULT 1.0 Repetition penalty works by lowering the chances of a word being selected again the more times that word has already been used. In other words, it works to prevent repetitive word usage. "length": 300 } res = requests.post( "https://shared-api.forefront.link/organization/GuejzaCOIXGT/codegen-16b-nl/completions/Gu6OxnDd8Tur", json=body, headers=headers ) data = res.json() return data['result'] iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()