Spaces:

gfhayworth
/

hopt

Paused

App Files Files Community

gfhayworth commited on Mar 3, 2023

Commit

4d7e790

1 Parent(s): 88f055d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +311 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+# -*- coding: utf-8 -*-
+#!pip install gradio
+#!pip install -U sentence-transformers
+#!pip install langchain
+#!pip install openai
+#!pip install -U chromadb
+import gradio as gr
+from sentence_transformers import SentenceTransformer, CrossEncoder, util
+from langchain.llms import OpenAI
+from langchain.docstore.document import Document
+from langchain.prompts import PromptTemplate
+from langchain.chains.question_answering import load_qa_chain
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain import LLMMathChain, SQLDatabase, SQLDatabaseChain, LLMChain
+from langchain.agents import initialize_agent, Tool
+# import sqlite3
+import pandas as pd
+import json
+import chromadb
+import os
+# cxn = sqlite3.connect('./data/mbr.db')
+"""# import models"""
+bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
+bi_encoder.max_seq_length = 256     #Truncate long passages to 256 tokens
+#The bi-encoder will retrieve top_k documents. We use a cross-encoder, to re-rank the results list to improve the quality
+#cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+"""# setup vector db
+- chromadb
+- https://docs.trychroma.com/getting-started
+"""
+from chromadb.config import Settings
+chroma_client = chromadb.Client(settings=Settings(
+    chroma_db_impl="duckdb+parquet",
+    persist_directory="./data/mychromadb/" # Optional, defaults to .chromadb/ in the current directory
+))
+#!ls ./data/mychromadb/
+#collection = chroma_client.create_collection(name="benefit_collection")
+collection = chroma_client.get_collection(name="healthy_opt_collection", embedding_function=bi_encoder)
+"""### vector db search examples"""
+def rtrv(qry,top_k=20):
+  results = collection.query(
+    query_embeddings=[ bi_encoder.encode(qry) ],
+    n_results=top_k,
+    )
+  return results
+def vdb_qry(qry,top_k=10):
+  results = collection.query(
+    query_embeddings=[ bi_encoder.encode(qry) ],
+    n_results=top_k,
+    include=["metadatas", "documents", "distances","embeddings"]
+    )
+  rslt_pd = pd.DataFrame(results ).explode(['ids','documents', 'metadatas', 'distances', 'embeddings'])
+  rslt_fmt = pd.concat([rslt_pd.drop(['metadatas'], axis=1), rslt_pd['metadatas'].apply(pd.Series)], axis=1 )
+  return rslt_fmt
+# qry = 'what should I do with my old card'
+# rslt_fmt = vdb_qry(qry, top_k=10)
+# rslt_fmt
+# doc_lst = rslt_fmt[['documents']].values.tolist()
+# len(doc_lst)
+## important to do this if you want to save the data for re-use
+# chroma_client.persist()
+"""# Introduction
+- example of the kind of question answering that is possible with this tool
+- assumes we are answering for a member with a Healthy Options Card
+*When will I get my card?*
+# semantic search functions
+"""
+## choosing not to use rerank for this use case
+# def rernk(query, collection=collection, top_k=20, top_n = 5):
+#   rtrv_rslts = rtrv(query, top_k=top_k)
+#   rtrv_ids = rtrv_rslts.get('ids')[0]
+#   rtrv_docs = rtrv_rslts.get('documents')[0]
+#   ##### Re-Ranking #####
+#   cross_inp = [[query, doc] for doc in rtrv_docs]
+#   cross_scores = cross_encoder.predict(cross_inp)
+#   # Sort results by the cross-encoder scores
+#   combined = list(zip(rtrv_ids, list(cross_scores)))
+#   sorted_tuples = sorted(combined, key=lambda x: x[1], reverse=True)
+#   sorted_ids = [t[0] for t in sorted_tuples[:top_n]]
+#   predictions = collection.get(ids=sorted_ids, include=["documents","metadatas"])
+#   return predictions
+#   #return cross_scores
+def get_text_fmt(qry):
+  prediction_text = []
+  predictions = rtrv(qry, top_k = 5)
+  docs = predictions['documents'][0]
+  meta = predictions['metadatas'][0]
+  for i in range(len(docs)):
+    result = Document(page_content=docs[i], metadata=meta[i])
+    prediction_text.append(result)
+  return prediction_text
+# get_text_fmt('can I buy fish?')
+"""# LLM based qa functions"""
+llm = OpenAI(temperature=0)
+# default model
+# model_name: str = "text-davinci-003"
+# instruction fine-tuned, sometimes referred to as GPT-3.5
+template = """You are a friendly AI assistant for the insurance company Humana.
+Given the following extracted parts of a long document and a question, create a succinct final answer.
+If you don't know the answer, just say that you don't know. Don't try to make up an answer.
+If the question is not about Humana or what you can buy with the card, politely inform the user that you are tuned to only answer questions about Humana Healthy Options.
+QUESTION: {question}
+=========
+{summaries}
+=========
+FINAL ANSWER:"""
+PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])
+chain_qa = load_qa_with_sources_chain(llm=llm, chain_type="stuff", prompt=PROMPT, verbose=False)
+def get_llm_response(message):
+  mydocs = get_text_fmt(message)
+  responses = chain_qa({"input_documents":mydocs, "question":message})
+  return responses
+# rslt = get_llm_response('can I buy shrimp?')
+# rslt['output_text']
+# for d in rslt['input_documents']:
+#   print(d.page_content)
+#   print(d.metadata['url'])
+# rslt['output_text']
+"""# Database query"""
+# db = SQLDatabase.from_uri("sqlite:///./data/mbr.db")
+# db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True, return_intermediate_steps=True)
+# def db_qry(qry):
+  # responses = db_chain('my mbr_id is 456 ;'+str(qry) ) ############### hardcode mbr id 456 for demo
+  # return responses
+# r = db_qry('how many footcare visits have I had?')
+# r['intermediate_steps']
+"""# Math
+- default version
+"""
+# llm_math_chain = LLMMathChain(llm=llm, verbose=True)
+# llm_math_chain.run('what is the square root of 49?')
+"""# Greeting"""
+template = """You are an AI assistant for the insurance company Humana.
+Your name is Jarvis and you were created on February 13, 2020.
+Offer polite, friendly greetings and brief small talk.
+Respond to thanks with, 'Glad to help.'
+If the question is not about Humana, politely guide the user to ask questions about Humana Healthy Options benefits.
+QUESTION: {question}
+=========
+FINAL ANSWER:"""
+greet_prompt = PromptTemplate(template=template, input_variables=["question"])
+greet_llm = LLMChain(prompt=greet_prompt, llm=llm, verbose=True)
+greet_llm.run('will it snow in Lousiville tomorrow')
+greet_llm.run('Thanks, that was great')
+"""# MRKL Chain"""
+tools = [
+    Tool(
+        name = "Benefit",
+        func=get_llm_response,
+        description='''Useful for confirming what items can be bought with the healthy options card.
+        Useful for when you need to answer questions about healthy options allowance.
+        You should ask targeted questions'''
+    ),
+    # Tool(
+    #     name="Calculator",
+    #     func=llm_math_chain.run,
+    #     description="useful for when you need to answer questions about math"
+    # ),
+    # Tool(
+    #     name="Member DB",
+    #     func=db_qry,
+    #     description='''useful for when you need to answer questions about member details such their name, id and accumulated use of services.
+    #     This tool shows how much a benfit has already been consumed.
+    #     Input should be in the form of a question containing full context'''
+    # ),
+    Tool(
+        name="Greeting",
+        func=greet_llm.run,
+        description="useful for when you need to respond to greetings, thanks, make small talk or answer questions about yourself"
+    ),
+]
+mrkl = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=False, return_intermediate_steps=True, max_iterations=5, early_stopping_method="generate")
+def mrkl_rspnd(qry):
+  response = mrkl({"input":str(qry) })
+  return response
+# r = mrkl_rspnd("can I buy fish with the card?")
+# print(r['output'])
+# print(json.dumps(r['intermediate_steps'], indent=2))
+#r['intermediate_steps']
+# r.keys()
+# from IPython.core.display import display, HTML
+def get_cot(r):
+  cot = '<p>'
+  try:
+    intermedObj = r['intermediate_steps']
+    cot +='<b>Input:</b> '+r['input']+'<br>'
+    for agnt_action, obs in intermedObj:
+      al = '<br>  '.join(agnt_action.log.split('\n') )
+      cot += '<b>AI chain of thought:</b> '+ al +'<br>'
+      if type(obs) is dict:
+        if obs.get('input_documents') is not None: #### this criteria doesn't work
+          for d in obs['input_documents']:
+            cot += '&nbsp;&nbsp;&nbsp;&nbsp;'+'<i>- '+str(d.page_content)+'</i>'+' <a href="'+ str(d.metadata['url']) +'">'+str(d.metadata['page'])+'</a> '+'<br>'
+          cot += '<b>Observation:</b> '+str(obs['output_text']) +'<br><br>'
+        elif obs.get('intermediate_steps') is not None:
+          cot += '<b>Query:</b> '+str(obs.get('intermediate_steps')) +'<br><br>'
+        else:
+          pass
+      else:
+        cot += '<b>Observation:</b> '+str(obs) +'<br><br>'
+  except:
+    pass
+  cot += '</p>'
+  return cot
+# cot = get_cot(r)
+# display(HTML(cot))
+"""# chat example"""
+def chat(message, history):
+  history = history or []
+  message = message.lower()
+  response = mrkl_rspnd(message)
+  cot = get_cot(response)
+  history.append((message, response['output']))
+  return history, history, cot
+css=".gradio-container {background-color: lightgray}"
+xmpl_list = ["How do I activate my spending account card?",
+                      "Can I use my card for copays at the doctor?",
+                      "Can I get fish with this card?",
+                      "Can I buy vitamins?",
+                      "Can I use this card with Uber?"]
+with gr.Blocks(css=css) as demo:
+  history_state = gr.State()
+  response_state = gr.State()
+  gr.Markdown('# Hack QA')
+  title='Benefit Chatbot'
+  description='chatbot with search on Health Benefits'
+  with gr.Row():
+    chatbot = gr.Chatbot()
+  # with gr.Row():
+  with gr.Accordion(label='Show AI chain of thought: ', open=False,):
+    ai_cot = gr.HTML(show_label=False)
+  with gr.Row():
+    message = gr.Textbox(label='Input your question here:',
+                         placeholder='What is the name of the plan described by this summary of benefits?',
+                         lines=1)
+    submit = gr.Button(value='Send',
+                       variant='secondary').style(full_width=False)
+  submit.click(chat,
+               inputs=[message, history_state],
+               outputs=[chatbot, history_state, ai_cot])
+  gr.Examples(
+            examples=xmpl_list,
+            inputs=message
+        )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+sentence-transformers==2.2.2
+openai==0.27.0
+gradio==3.19.1
+langchain==0.0.100
+chromadb==0.3.10