import torch import pandas as pd import gradio as gr from datasets import load_dataset from sentence_transformers import SentenceTransformer, util, models q_encoder = SentenceTransformer(modules=[ models.Transformer(model_name_or_path="checkpoints/q_encoder", max_seq_length=512), models.Pooling(word_embedding_dimension=768, pooling_mode='cls'), ]) doc_embeddings = torch.load('checkpoints/doc_embeddings.pt', map_location=torch.device('cpu')) docs = pd.DataFrame(load_dataset("antoiloui/bsard", data_files="articles_fr.csv")['train']) def search(question): q_emb = q_encoder.encode(question, convert_to_tensor=True) hits = util.semantic_search(q_emb, doc_embeddings, top_k=100, score_function=util.cos_sim)[0] return {docs.loc[h['corpus_id'], 'article'] + '\n\n' + f"- Art. {docs.loc[h['corpus_id'], 'article_no']}, {docs.loc[h['corpus_id'], 'code']}" for h in hits[:5]} title = "Belgian Legislation Search" description = "A biencoder model was trained to retrieve relevant statutory articles to legal issues. Ask it a question in French!" article = """ The model will return the most semantically relevant laws from a corpus of 22,633 statutory articles collected from 32 Belgian codes: | Authority | Code | #Articles | |-----------|-----------------------------------------------------|-----------| | Federal | Judicial Code | 2285 | | | Code of Economic Law | 2032 | | | Civil Code | 1961 | | | Code of Workplace Welfare | 1287 | | | Code of Companies and Associations | 1194 | | | Code of Local Democracy and Decentralization | 1159 | | | Navigation Code | 977 | | | Code of Criminal Instruction | 719 | | | Penal Code | 689 | | | Social Penal Code | 307 | | | Forestry Code | 261 | | | Railway Code | 260 | | | Electoral Code | 218 | | | The Constitution | 208 | | | Code of Various Rights and Taxes | 191 | | | Code of Private International Law | 135 | | | Consular Code | 100 | | | Rural Code | 87 | | | Military Penal Code | 66 | | | Code of Belgian Nationality | 31 | | Regional | Walloon Code of Social Action and Health | 3650 | | | Walloon Code of the Environment | 1270 | | | Walloon Code of Territorial Development | 796 | | | Walloon Public Service Code | 597 | | | Walloon Code of Agriculture | 461 | | | Brussels Spatial Planning Code | 401 | | | Walloon Code of Basic and Secondary Education | 310 | | | Walloon Code of Sustainable Housing | 286 | | | Brussels Housing Code | 279 | | | Brussels Code of Air, Climate and Energy Management | 208 | | | Walloon Animal Welfare Code | 108 | | | Brussels Municipal Electoral Code | 100 | | Total | | 22633 | """ examples = [ "Qu'est-ce que je risque si je viole le secret professionnel ?", "Mon employeur peut-il me licencier alors que je suis malade ?", "Mon voisin fait beaucoup de bruit, que faire ?", ] gr.Interface(fn=search, inputs=['text'], outputs=['textbox']*5, allow_flagging="never", title=title, description=description, article=article, examples=examples).launch()