rmdhirr's picture
Create app.py
99ca6b3 verified
"""
author: Elena Lowery
This code sample shows how to invoke Large Language Models (LLMs) deployed in watsonx.ai.
Documentation: https://ibm.github.io/watson-machine-learning-sdk/foundation_models.html
You will need to provide your IBM Cloud API key and a watonx.ai project id (any project)
for accessing watsonx.ai in a .env file
This example shows simple use cases without comprehensive prompt tuning
"""
# Install the wml api in your Python environment prior to running this example:
# pip install ibm-watson-machine-learning
# pip install ibm-cloud-sdk-core
# pip install python-dotenv
# pip install gradio
# For reading credentials from the .env file
import os
from dotenv import load_dotenv
# WML python SDK
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods
# For invocation of LLM with REST API
import requests, json
from ibm_cloud_sdk_core import IAMTokenManager
# For creating Gradio interface
import gradio as gr
# URL of the hosted LLMs is hardcoded because at this time all LLMs share the same endpoint
url = "https://us-south.ml.cloud.ibm.com"
# These global variables will be updated in get_credentials() functions
watsonx_project_id = ""
# Replace with your IBM Cloud key
api_key = ""
def get_credentials():
load_dotenv()
# Update the global variables that will be used for authentication in another function
globals()["api_key"] = os.getenv("api_key", None)
globals()["watsonx_project_id"] = os.getenv("project_id", None)
# The get_model function creates an LLM model object with the specified parameters
def get_model(model_type, max_tokens, min_tokens, decoding, temperature):
generate_params = {
GenParams.MAX_NEW_TOKENS: max_tokens,
GenParams.MIN_NEW_TOKENS: min_tokens,
GenParams.DECODING_METHOD: decoding,
GenParams.TEMPERATURE: temperature
}
model = Model(
model_id=model_type,
params=generate_params,
credentials={
"apikey": api_key,
"url": url
},
project_id=watsonx_project_id
)
return model
def generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature):
model = get_model(model_type, max_tokens, min_tokens, decoding, temperature)
generated_response = model.generate(prompt=prompt)
return generated_response['results'][0]['generated_text']
def demo_LLM_invocation(prompt, model_type="google/flan-ul2", max_tokens=300, min_tokens=50, decoding="sample", temperature=0.7):
get_credentials()
response = generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature)
return response
# Gradio interface
def gradio_interface(prompt):
response = demo_LLM_invocation(prompt)
return response
# Create a Gradio app
iface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs="text",
title="🌠 Test watsonx.ai LLM",
description="Ask a question and get a response from the IBM Watson LLM. For example: 'What is IBM?'"
)
if __name__ == "__main__":
iface.launch()