Spaces:
Runtime error
Runtime error
File size: 3,308 Bytes
cf1798b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
"""Call API providers."""
import os
import random
import time
from fastchat.utils import build_logger
from fastchat.constants import WORKER_API_TIMEOUT
logger = build_logger("gradio_web_server", "gradio_web_server.log")
def openai_api_stream_iter(
model_name,
messages,
temperature,
top_p,
max_new_tokens,
api_base=None,
api_key=None,
):
import openai
openai.api_base = api_base or "https://api.openai.com/v1"
openai.api_key = api_key or os.environ["OPENAI_API_KEY"]
if model_name == "gpt-4-turbo":
model_name = "gpt-4-1106-preview"
# Make requests
gen_params = {
"model": model_name,
"prompt": messages,
"temperature": temperature,
"top_p": top_p,
"max_new_tokens": max_new_tokens,
}
logger.info(f"==== request ====\n{gen_params}")
res = openai.ChatCompletion.create(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
stream=True,
)
text = ""
for chunk in res:
text += chunk["choices"][0]["delta"].get("content", "")
data = {
"text": text,
"error_code": 0,
}
yield data
def anthropic_api_stream_iter(model_name, prompt, temperature, top_p, max_new_tokens):
import anthropic
c = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
# Make requests
gen_params = {
"model": model_name,
"prompt": prompt,
"temperature": temperature,
"top_p": top_p,
"max_new_tokens": max_new_tokens,
}
logger.info(f"==== request ====\n{gen_params}")
res = c.completions.create(
prompt=prompt,
stop_sequences=[anthropic.HUMAN_PROMPT],
max_tokens_to_sample=max_new_tokens,
temperature=temperature,
top_p=top_p,
model=model_name,
stream=True,
)
text = ""
for chunk in res:
text += chunk.completion
data = {
"text": text,
"error_code": 0,
}
yield data
def init_palm_chat(model_name):
import vertexai # pip3 install google-cloud-aiplatform
from vertexai.preview.language_models import ChatModel
project_id = os.environ["GCP_PROJECT_ID"]
location = "us-central1"
vertexai.init(project=project_id, location=location)
chat_model = ChatModel.from_pretrained(model_name)
chat = chat_model.start_chat(examples=[])
return chat
def palm_api_stream_iter(chat, message, temperature, top_p, max_new_tokens):
parameters = {
"temperature": temperature,
"top_p": top_p,
"max_output_tokens": max_new_tokens,
}
gen_params = {
"model": "palm-2",
"prompt": message,
}
gen_params.update(parameters)
logger.info(f"==== request ====\n{gen_params}")
response = chat.send_message(message, **parameters)
content = response.text
pos = 0
while pos < len(content):
# This is a fancy way to simulate token generation latency combined
# with a Poisson process.
pos += random.randint(10, 20)
time.sleep(random.expovariate(50))
data = {
"text": content[:pos],
"error_code": 0,
}
yield data
|