File size: 4,398 Bytes
7473ba2 e29da5d 7473ba2 02ffc6e 7473ba2 02ffc6e 7473ba2 02ffc6e 7473ba2 02ffc6e 7473ba2 9911f95 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import torch
import pandas as pd
import gradio as gr
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, util, models
q_encoder = SentenceTransformer(modules=[
models.Transformer(model_name_or_path="checkpoints/q_encoder", max_seq_length=512),
models.Pooling(word_embedding_dimension=768, pooling_mode='cls'),
])
doc_embeddings = torch.load('checkpoints/doc_embeddings.pt', map_location=torch.device('cpu'))
docs = pd.DataFrame(load_dataset("antoiloui/bsard", data_files="articles_fr.csv")['train'])
def search(question):
q_emb = q_encoder.encode(question, convert_to_tensor=True)
hits = util.semantic_search(q_emb, doc_embeddings, top_k=100, score_function=util.cos_sim)[0]
return {docs.loc[h['corpus_id'], 'article'] + '\n\n' + f"- Art. {docs.loc[h['corpus_id'], 'article_no']}, {docs.loc[h['corpus_id'], 'code']}" for h in hits[:5]}
title = "Belgian Legislation Search"
description = "A biencoder model was trained to retrieve relevant statutory articles to legal issues. Ask it a question in French!"
article = """
The model will return the most semantically relevant laws from a corpus of 22,633 statutory articles collected from 32 Belgian codes:
| Authority | Code | #Articles |
|-----------|-----------------------------------------------------|-----------|
| Federal | Judicial Code | 2285 |
| | Code of Economic Law | 2032 |
| | Civil Code | 1961 |
| | Code of Workplace Welfare | 1287 |
| | Code of Companies and Associations | 1194 |
| | Code of Local Democracy and Decentralization | 1159 |
| | Navigation Code | 977 |
| | Code of Criminal Instruction | 719 |
| | Penal Code | 689 |
| | Social Penal Code | 307 |
| | Forestry Code | 261 |
| | Railway Code | 260 |
| | Electoral Code | 218 |
| | The Constitution | 208 |
| | Code of Various Rights and Taxes | 191 |
| | Code of Private International Law | 135 |
| | Consular Code | 100 |
| | Rural Code | 87 |
| | Military Penal Code | 66 |
| | Code of Belgian Nationality | 31 |
| Regional | Walloon Code of Social Action and Health | 3650 |
| | Walloon Code of the Environment | 1270 |
| | Walloon Code of Territorial Development | 796 |
| | Walloon Public Service Code | 597 |
| | Walloon Code of Agriculture | 461 |
| | Brussels Spatial Planning Code | 401 |
| | Walloon Code of Basic and Secondary Education | 310 |
| | Walloon Code of Sustainable Housing | 286 |
| | Brussels Housing Code | 279 |
| | Brussels Code of Air, Climate and Energy Management | 208 |
| | Walloon Animal Welfare Code | 108 |
| | Brussels Municipal Electoral Code | 100 |
| Total | | 22633 |
"""
examples = [
"Qu'est-ce que je risque si je viole le secret professionnel ?",
"Mon employeur peut-il me licencier alors que je suis malade ?",
"Mon voisin fait beaucoup de bruit, que faire ?",
]
gr.Interface(fn=search, inputs=['text'], outputs=['textbox']*5, allow_flagging="never", title=title, description=description, article=article, examples=examples).launch()
|