# For reading credentials from the .env file
import os
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from chromadb.api.types import EmbeddingFunction
# WML python SDK
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods
import requests
from bs4 import BeautifulSoup
import spacy
import chromadb
import en_core_web_md
from utils import chromadb_client
# Important: hardcoding the API key in Python code is not a best practice. We are using
# this approach for the ease of demo setup. In a production application these variables
# can be stored in an .env or a properties file
# These global variables will be updated in get_credentials() function
watsonx_project_id = ""
# Replace with your IBM Cloud key
api_key = ""
# Replace with your endpoint ie. "https://us-south.ml.cloud.ibm.com"
watsonx_url = ""
def get_credentials():
# Update the global variables that will be used for authentication in another function
globals()["api_key"] = os.getenv("api_key", None)
globals()["watsonx_project_id"] = os.getenv("project_id", None)
globals()["watsonx_url"] = os.getenv("watsonx_url", None)
# The get_model function creates an LLM model object with the specified parameters
def get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p):
generate_params = {
GenParams.MAX_NEW_TOKENS: max_tokens,
GenParams.MIN_NEW_TOKENS: min_tokens,
GenParams.DECODING_METHOD: decoding,
GenParams.TEMPERATURE: temperature,
GenParams.TOP_K: top_k,
GenParams.TOP_P: top_p,
model = Model(
"apikey": api_key,
"url": watsonx_url
return model
def get_model_test(model_type, max_tokens, min_tokens, decoding, temperature):
generate_params = {
GenParams.MAX_NEW_TOKENS: max_tokens,
GenParams.MIN_NEW_TOKENS: min_tokens,
GenParams.DECODING_METHOD: decoding,
GenParams.TEMPERATURE: temperature
model = Model(
"apikey": api_key,
"url": watsonx_url
return model
# Set up cache directory (consider user-defined location)
current_dir = os.getcwd()
cache_dir = os.path.join(current_dir, ".cache")
# Create cache directory if necessary
if not os.path.exists(cache_dir):
# Set the Hugging Face cache directory
os.environ["HF_HOME"] = cache_dir
# Download the model (specify the correct model identifier)
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
#model_name = "all-MiniLM-L6-v2"
model = SentenceTransformer(model_name, cache_folder=cache_dir)
# Print confirmation message
print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")
# Embedding function
class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
MODEL = model
def __call__(self, texts):
return MiniLML6V2EmbeddingFunction.MODEL.encode(texts).tolist()
def extract_text(url):
# Send an HTTP GET request to the URL
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content of the page using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Extract contents of <p> elements
p_contents = [p.get_text() for p in soup.find_all('p')]
# Print the contents of <p> elements
print("\nContents of <p> elements: \n")
for content in p_contents:
raw_web_text = " ".join(p_contents)
# remove \xa0 which is used in html to avoid words break acorss lines.
cleaned_text = raw_web_text.replace("\xa0", " ")
return cleaned_text
print(f"Failed to retrieve the page. Status code: {response.status_code}")
except Exception as e:
print(f"An error occurred: {str(e)}")
def split_text_into_sentences(text):
nlp = spacy.load("en_core_web_md")
doc = nlp(text)
sentences = [sent.text for sent in doc.sents]
cleaned_sentences = [s.strip() for s in sentences]
return cleaned_sentences
def create_embedding(url, collection_name,client):
cleaned_text = extract_text(url)
cleaned_sentences = split_text_into_sentences(cleaned_text)
collection = client.get_or_create_collection(collection_name)
# Upload text to chroma
metadatas=[{"source": str(i)} for i in range(len(cleaned_sentences))],
ids=[str(i) for i in range(len(cleaned_sentences))],
return collection
def create_prompt_old(url, question, collection_name, client):
# Create embeddings for the text file
collection = create_embedding(url, collection_name, client)
# query relevant information
relevant_chunks = collection.query(
context = "\n\n\n".join(relevant_chunks["documents"][0])
# Please note that this is a generic format. You can change this format to be specific to llama
prompt = (f"{context}\n\nPlease answer the following question in one sentence using this "
+ f"text. "
+ f"If the question is unanswerable, say \"unanswerable\". Do not include information that's not relevant to the question."
+ f"Question: {question}")
return prompt
def create_prompt(url, question, collection_name,client):
# Create embeddings for the text file
collection = create_embedding(url, collection_name,client)
except Exception as e:
return f"Error creating embeddings: {e}"
# Query relevant information
relevant_chunks = collection.query(
context = "\n\n\n".join(relevant_chunks["documents"][0])
except Exception as e:
return f"Error querying the collection: {e}"
# Create the prompt
prompt = (
"You are a helpful AI assistant.\n"
f"### Context:\n{context}\n\n"
f"### Instruction:\n"
f"Please answer the following question based on the above context. Your answer should be concise and directly address the question. "
f"If the question is unanswerable based on the given context, respond with 'unanswerable'.\n\n"
f"### Question:\n{question}\n"
return prompt
def main():
# Get the API key and project id and update global variables
# Try diffrent URLs and questions
url = "https://www.usbank.com/financialiq/manage-your-household/buy-a-car/own-electric-vehicles-learned-buying-driving-EVs.html"
question = "What are the incentives for purchasing EVs?"
# question = "What is the percentage of driving powered by hybrid cars?"
# question = "Can an EV be plugged in to a household outlet?"
collection_name = "test_web_RAG"
answer_questions_from_web(api_key, watsonx_project_id, watsonx_url,url, question, collection_name,client)
def answer_questions_from_web(request_api_key, request_project_id, request_watsonx_url,url, question, collection_name,client):
# Update the global variable
globals()["api_key"] = request_api_key
globals()["watsonx_project_id"] = request_project_id
globals()["watsonx_url"] = request_watsonx_url
# Specify model parameters
model_type = "meta-llama/llama-2-70b-chat"
#model_type = "meta-llama/llama-3-70b-instruct"
max_tokens = 100
min_tokens = 50
top_k = 50
top_p = 1
decoding = DecodingMethods.GREEDY
temperature = 0.7
# Get the watsonx model = try both options
model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
# Get the prompt
complete_prompt = create_prompt(url, question, collection_name,client)
# Let's review the prompt
print("*** Prompt:" + complete_prompt + "***")
generated_response = model.generate(prompt=complete_prompt)
response_text = generated_response['results'][0]['generated_text']
# Remove trailing white spaces
response_text = response_text.strip()
# print model response
print("--------------------------------- Generated response -----------------------------------")
return response_text
# Invoke the main function
if __name__ == "__main__":