charter_search / app.py
Tonyivan's picture
Update app.py
d827046 verified
import gradio as gr
import json
from FlagEmbedding import BGEM3FlagModel
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)
with open("charters.json", "r") as f:
charters = json.loads(f.read())
def greet(question):
sentences = cleanSplit(charters['Boston']['current'])
question_embeddings = model.encode(question,
batch_size=12,
max_length=128, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
return_dense=False, return_sparse=False, return_colbert_vecs=True)
embeddings_2 = model.encode(sentences,
batch_size=12,
max_length=512,
return_dense=False, return_sparse=False, return_colbert_vecs=True)
print(embeddings_2)
return "yes"
def cleanSplit(text):
resp = []
docs = text.replace(".\n", ". ").split(". ")
for doc in docs:
if bool(doc) and len([x for x in doc if x.isalnum()]) > 0:
if len(resp) > 0 and (len(resp[-1]) + len(doc)) <= 128:
resp[-1] += doc.replace("\n", " ") + '. '
else:
doc += '. '
resp.append(doc)
return resp
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()