import gradio as gr import json from FlagEmbedding import BGEM3FlagModel model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) with open("charters.json", "r") as f: charters = json.loads(f.read()) def greet(question): sentences = cleanSplit(charters['Boston']['current']) question_embeddings = model.encode(question, batch_size=12, max_length=128, # If you don't need such a long length, you can set a smaller value to speed up the encoding process. return_dense=False, return_sparse=False, return_colbert_vecs=True) embeddings_2 = model.encode(sentences, batch_size=12, max_length=512, return_dense=False, return_sparse=False, return_colbert_vecs=True) print(embeddings_2) return "yes" def cleanSplit(text): resp = [] docs = text.replace(".\n", ". ").split(". ") for doc in docs: if bool(doc) and len([x for x in doc if x.isalnum()]) > 0: if len(resp) > 0 and (len(resp[-1]) + len(doc)) <= 128: resp[-1] += doc.replace("\n", " ") + '. ' else: doc += '. ' resp.append(doc) return resp iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()