import pinecone import streamlit as st from transformers import pipeline from sentence_transformers import SentenceTransformer PINECONE_KEY = 'b2aaea5e-1395-4270-8c6b-2c89ff1d0a13' @st.experimental_singleton def init_pinecone(): pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp") # get a free api key from app.pinecone.io return pinecone.Index("extractive-question-answering") @st.experimental_singleton def init_models(): retriever = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1") model_name = 'jaimin/bert-large-squad' reader = pipeline(tokenizer=model_name, model=model_name, task='question-answering') return retriever, reader st.session_state.index = init_pinecone() retriever, reader = init_models() def card(title, context, score): return st.markdown(f"""
{title}
{context} [Score: {score}]
""", unsafe_allow_html=True) st.title("") st.write(""" # Extractive Question Answering Ask me a question! """) st.markdown(""" """, unsafe_allow_html=True) def run_query(query): xq = retriever.encode([query]).tolist() try: xc = st.session_state.index.query(xq, top_k=3, include_metadata=True) except: # force reload pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp") st.session_state.index = pinecone.Index("extractive-question-answering") xc = st.session_state.index.query(xq, top_k=3, include_metadata=True) results = [] for match in xc['matches']: answer = reader(question=query, context=match["metadata"]['context']) answer["title"] = match["metadata"]['title'] answer["context"] = match["metadata"]['context'] results.append(answer) sorted_result = sorted(results, key=lambda x: x['score'], reverse=True) for r in sorted_result: answer = r["answer"] context = r["context"].replace(answer, f"{answer}") title = r["title"].replace("_", " ") score = round(r["score"], 4) card(title, context, score) query = st.text_input("Search!", "") if query != "": run_query(query)