antoinelouis commited on
Commit
7473ba2
1 Parent(s): 9158c03

Add app files

Browse files
Files changed (4) hide show
  1. README.md +8 -5
  2. app.py +26 -0
  3. checkpoints/doc_embeddings.pt +3 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,16 @@
1
  ---
2
  title: Legislation Search
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.0.24
8
  app_file: app.py
9
  pinned: false
10
  license: cc-by-nc-sa-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
  title: Legislation Search
3
+ emoji: 🇧🇪
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
  license: cc-by-nc-sa-4.0
10
  ---
11
 
12
+ This is a demo for the paper: "A Statutory Article Retrieval Dataset in French" (ACL 2022)
13
+
14
+ - Paper: https://aclanthology.org/2022.acl-long.468/
15
+ - Code: https://github.com/maastrichtlawtech/bsard
16
+ - Data: https://huggingface.co/datasets/antoiloui/bsard
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ import torch
5
+ from datasets import load_dataset
6
+ from sentence_transformers import SentenceTransformer, util
7
+
8
+
9
+ q_encoder = SentenceTransformer("checkpoints/q_encoder")
10
+ doc_embeddings = torch.load('checkpoints/doc_embeddings.pt')
11
+ docs = pd.DataFrame(load_dataset("antoiloui/bsard", data_files="articles_fr.csv")['train'])
12
+
13
+ def search(query):
14
+ q_emb = q_encoder.encode(query, convert_to_tensor=True)
15
+ hits = util.semantic_search(q_emb, doc_embeddings, top_k=100, score_function=util.cos_sim)[0]
16
+ return {docs.loc[h['corpus_id'], 'article']: f"Art. {docs.loc[h['corpus_id'], 'article_no']}, {docs.loc[h['corpus_id'], 'code']}" for h in hits[:5]}
17
+
18
+ gr.Interface(
19
+ fn=search,
20
+ inputs=gr.Textbox(label="Question", placeholder=""),
21
+ outputs=[gr.Textbox(lines=5, label="Result"),gr.Textbox(label="Reference")],
22
+ title="Legislation Search 🇧🇪",
23
+ description="",
24
+ flagging_options=["👍","👎"],
25
+ examples=["Qu'est-ce que je risque si je viole le secret professionnel ?", "Mon employeur peut-il me licencier alors que je suis malade ?"]
26
+ ).launch(share=False, enable_queue=False)
checkpoints/doc_embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc0b8cb264f836bfa266ea91736b71acbdc16d4306708aa958392d283b55004
3
+ size 69529323
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas==1.3.5
2
+ sentence-transformers==2.1.0
3
+ torch==1.10.1
4
+ datasets==1.18.3