emmetmayer's picture
Update app.py
90c428c
raw
history blame contribute delete
No virus
1.5 kB
import streamlit as st
import json
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from sentence_transformers import SentenceTransformer, util
from io import StringIO
@st.cache
def vectorizeSentences(model, sentences):
embeddings = model.encode(sentences)
cosine_scores = util.cos_sim(embeddings[0], embeddings)
return cosine_scores[0]
@st.cache
def loadContext(model, context):
data = json.load(contextUpload)
embeddings = vectorizeSentences(model, list(data.keys()))
return (data, embeddings)
@st.cache
def question(model, question, data, embeddings):
data = json.load(contextUpload)
embeddings = vectorizeSentences(model, list(data.keys()))
return (data, embeddings)
@st.cache
def loadSentenceModel():
return SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
@st.cache
def loadQAModel():
return pipeline("question-answering", model="deepset/roberta-base-squad2", tokenizer="deepset/roberta-base-squad2")
sentenceModel = loadSentenceModel
questionAnsweringModel = loadQAModel
st.header("Large Context Question & Answering")
st.info("Upload a JSON context file")
contextUpload = st.file_uploader("Upload a.json context file", type=["json"])
st.json(json.load(contextUpload))
if contextUpload is not None:
#embeddings = loadContext(sentenceModel, contextUpload)
# question = st.text_input("Enter your question", value="")
#if question != "":
st.write("question")