Spaces:
Sleeping
Sleeping
""" | |
author: Elena Lowery | |
This code sample shows how to invoke Large Language Models (LLMs) deployed in watsonx.ai. | |
Documentation: https://ibm.github.io/watson-machine-learning-sdk/foundation_models.html | |
You will need to provide your IBM Cloud API key and a watonx.ai project id (any project) | |
for accessing watsonx.ai in a .env file | |
This example shows simple use cases without comprehensive prompt tuning | |
""" | |
# Install the wml api in your Python environment prior to running this example: | |
# pip install ibm-watson-machine-learning | |
# pip install ibm-cloud-sdk-core | |
# pip install python-dotenv | |
# pip install gradio | |
# For reading credentials from the .env file | |
import os | |
from dotenv import load_dotenv | |
# WML python SDK | |
from ibm_watson_machine_learning.foundation_models import Model | |
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams | |
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods | |
# For invocation of LLM with REST API | |
import requests, json | |
from ibm_cloud_sdk_core import IAMTokenManager | |
# For creating Gradio interface | |
import gradio as gr | |
# URL of the hosted LLMs is hardcoded because at this time all LLMs share the same endpoint | |
url = "https://us-south.ml.cloud.ibm.com" | |
# These global variables will be updated in get_credentials() functions | |
watsonx_project_id = "" | |
# Replace with your IBM Cloud key | |
api_key = "" | |
def get_credentials(): | |
load_dotenv() | |
# Update the global variables that will be used for authentication in another function | |
globals()["api_key"] = os.getenv("api_key", None) | |
globals()["watsonx_project_id"] = os.getenv("project_id", None) | |
# The get_model function creates an LLM model object with the specified parameters | |
def get_model(model_type, max_tokens, min_tokens, decoding, temperature): | |
generate_params = { | |
GenParams.MAX_NEW_TOKENS: max_tokens, | |
GenParams.MIN_NEW_TOKENS: min_tokens, | |
GenParams.DECODING_METHOD: decoding, | |
GenParams.TEMPERATURE: temperature | |
} | |
model = Model( | |
model_id=model_type, | |
params=generate_params, | |
credentials={ | |
"apikey": api_key, | |
"url": url | |
}, | |
project_id=watsonx_project_id | |
) | |
return model | |
def generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature): | |
model = get_model(model_type, max_tokens, min_tokens, decoding, temperature) | |
generated_response = model.generate(prompt=prompt) | |
return generated_response['results'][0]['generated_text'] | |
def demo_LLM_invocation(prompt, model_type="google/flan-ul2", max_tokens=300, min_tokens=50, decoding="sample", temperature=0.7): | |
get_credentials() | |
response = generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature) | |
return response | |
# Gradio interface | |
def gradio_interface(prompt): | |
response = demo_LLM_invocation(prompt) | |
return response | |
# Create a Gradio app | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs="text", | |
outputs="text", | |
title="🌠 Test watsonx.ai LLM", | |
description="Ask a question and get a response from the IBM Watson LLM. For example: 'What is IBM?'" | |
) | |
if __name__ == "__main__": | |
iface.launch() |