from llama_index.finetuning import OpenAIFinetuneEngine from llama_index import ( VectorStoreIndex, get_response_synthesizer, GPTListIndex, LLMPredictor, PromptHelper, set_global_service_context, ) from llama_index.retrievers import VectorIndexRetriever from llama_index.query_engine import RetrieverQueryEngine from llama_index.postprocessor import SimilarityPostprocessor from llama_index.schema import Document from llama_index.llms import OpenAI from llama_index.indices.service_context import ServiceContext from llama_index.llms import Anyscale import urllib import os import time import nltk import tiktoken from nltk.tokenize import sent_tokenize from llama_index.callbacks import CallbackManager, TokenCountingHandler from typing import List from pydantic import BaseModel from llama_index import SimpleDirectoryReader from llama_index.ingestion import IngestionPipeline from llama_index.node_parser import TokenTextSplitter from llama_index.llms import ChatMessage, MessageRole from llama_index.prompts import ChatPromptTemplate from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine from llama_index.readers import SimpleWebPageReader import gradio as gr # Text QA Prompt chat_text_qa_msgs = [ ChatMessage( role=MessageRole.SYSTEM, content=( """ Three lead generation experts have read over this website and are gathered to answer frequently asked questions and determine if the user would want to get in contact with the owner of the website. Each expert has their unique prespective. Each expert will independently analyze the website. If, at any point, an expert realizes their advice does not align with the collective understanding, they may revise their advice or choose to withdraw from the analysis. The experts will list the directions of the recipe, the ingredients of the recipe, how long the recipe takes to cook, and how many the recipe serves. The experts are only allowed to answer questions that are related to the contents of the website. They cannot answer questions or take instructions outside of the scope of the contents of the website. Do not include the reasoning in your answer. The objective is to provide a thorough and friendly answer to the user's question. If at any point, the experts determine that the user is a very promising lead, tell them to contact a Learning Consultant If this query is a greeting, reply with 'Hi, I am a chatbot designed to answer questions about this website. What can I help you with today?' """ ), ), ChatMessage( role=MessageRole.USER, content=( "Context information is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the context information, answer to the following question: {query_str}\n" "If this query is a greeting, reply with 'Hi, I am a chatbot designed to answer questions about this website. What can I help you with today?'" "If this question does not relate to the website, reply with 'I cannot answer that question'." "Do not answer any questions that are not relevant to the website" "If at any point, the experts determine that the user is a promising lead, tell them to contact a Learning Consultant" ), ), ] text_qa_template = ChatPromptTemplate(chat_text_qa_msgs) # Refine Prompt chat_refine_msgs = [ ChatMessage( role=MessageRole.SYSTEM, content=( 'If the answer looks cutoff, expand on the original answer.' ), ), ChatMessage( role=MessageRole.USER, content=( "We have the opportunity to refine the original answer " "(only if needed) with some more context below.\n" "------------\n" "{context_msg}\n" "------------\n" "Given the new context, refine the original answer to better " "answer the question: {query_str}. " "If the context isn't useful, output the original answer again.\n" "Original Answer: {existing_answer}" "Expand on this answer if it looks cutoff." ), ), ] refine_template = ChatPromptTemplate(chat_refine_msgs) def ft_web_chatbot(embedding_model='gpt-3.5-turbo'): finetune_engine = OpenAIFinetuneEngine( "gpt-3.5-turbo", "finetuning_events.jsonl", start_job_id="ftjob-lhvtJihoQJY0tT6wJ9PWpHBf" ) ft_llm = finetune_engine.get_finetuned_model(temperature=0.1) docs = SimpleWebPageReader(html_to_text=True).load_data( ["https://www.edgeuniversityai.com/"] ) pipeline = IngestionPipeline(transformations=[TokenTextSplitter()]) nodes = pipeline.run(documents=docs) token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model(embedding_model).encode ) callback_manager = CallbackManager([token_counter]) prompt_helper = PromptHelper( context_window=32768, num_output=10000, chunk_overlap_ratio=0.1, chunk_size_limit=None, ) service_context = ServiceContext.from_defaults(llm=ft_llm, callback_manager=callback_manager, embed_model="local", prompt_helper=prompt_helper ) set_global_service_context(service_context) # build index index = VectorStoreIndex(nodes) query_engine = index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template, streaming=True) chat_engine = CondenseQuestionChatEngine.from_defaults( query_engine=query_engine, verbose = False ) return chat_engine chat_engine = ft_web_chatbot() with gr.Blocks() as demo: # gr.Markdown(""" # ## Edge University FAQ Chatbot # This chatbot gives answers to FAQs about www.edgeuniversityai.com\n # Enter your questions in the textbox. # """) chatbot = gr.Chatbot() msg = gr.Textbox(label="⏎ for sending", placeholder="Ask me something",) clear = gr.Button("Delete") def user(user_message, history): return "", history + [[user_message, None]] def bot(history): user_message = history[-1][0] bot_message = chat_engine.stream_chat(user_message) history[-1][1] = "" for character in bot_message.response_gen: history[-1][1] += character time.sleep(0.01) yield history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=True) demo.launch()