Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
from FlagEmbedding import BGEM3FlagModel | |
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) | |
with open("charters.json", "r") as f: | |
charters = json.loads(f.read()) | |
def greet(question): | |
sentences = cleanSplit(charters['Boston']['current']) | |
question_embeddings = model.encode(question, | |
batch_size=12, | |
max_length=128, # If you don't need such a long length, you can set a smaller value to speed up the encoding process. | |
return_dense=False, return_sparse=False, return_colbert_vecs=True) | |
embeddings_2 = model.encode(sentences, | |
batch_size=12, | |
max_length=512, | |
return_dense=False, return_sparse=False, return_colbert_vecs=True) | |
print(embeddings_2) | |
return "yes" | |
def cleanSplit(text): | |
resp = [] | |
docs = text.replace(".\n", ". ").split(". ") | |
for doc in docs: | |
if bool(doc) and len([x for x in doc if x.isalnum()]) > 0: | |
if len(resp) > 0 and (len(resp[-1]) + len(doc)) <= 128: | |
resp[-1] += doc.replace("\n", " ") + '. ' | |
else: | |
doc += '. ' | |
resp.append(doc) | |
return resp | |
iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
iface.launch() |