Tollef Jørgensen commited on
Commit
77c842b
1 Parent(s): 0a6889b

ignore and some updates

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. __pycache__/app.cpython-39.pyc +0 -0
  3. app.py +52 -24
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pyc
2
+ __pycache__
__pycache__/app.cpython-39.pyc DELETED
Binary file (2.63 kB)
 
app.py CHANGED
@@ -6,6 +6,8 @@ from datasets import load_dataset
6
  from sentence_transformers import SentenceTransformer
7
 
8
  idx = 0
 
 
9
 
10
  dataset = load_dataset("tollefj/rettsavgjoerelser_100samples_embeddings")
11
  model = SentenceTransformer("NbAiLab/nb-sbert-base")
@@ -27,18 +29,13 @@ def get_doc_embeddings(doc):
27
  return np.array(doc.embedding.tolist(), dtype="float32")
28
 
29
 
30
- def faiss_search(doc_url, query_str, K=5):
31
  global idx
 
 
32
  # find idx from url:
33
- doc_idx = df[df.url == doc_url].index[0]
34
- idx = int(doc_idx)
35
- newdoc = build_doc_frame(df, idx)
36
- embeddings = get_doc_embeddings(newdoc)
37
-
38
- faiss.normalize_L2(embeddings)
39
- index = faiss.IndexFlatIP(768)
40
- index.add(embeddings)
41
-
42
  target_emb = model.encode([query_str])
43
  target_emb = np.array([target_emb.reshape(-1)])
44
  faiss.normalize_L2(target_emb)
@@ -65,25 +62,56 @@ def faiss_search(doc_url, query_str, K=5):
65
  dropdown_opts = [doc.url for idx, doc in df.iterrows()]
66
 
67
  with gr.Blocks() as demo:
68
- gr.Label("Lovdata rettsavgjørelser - semantisk søk")
69
-
70
- case_dropdown = gr.Dropdown(label="Velg en rettsavgjørelse", choices=dropdown_opts, default=dropdown_opts[0])
71
- # when case_dropdown changes, update the summary dropdown:
72
- # idx_label = gr.Label(f"Current index: {idx}")
73
-
74
- query = gr.Textbox(
75
- label="Søk etter setninger",
76
- lines=1,
77
- placeholder="Kollisjon mellom to kjøretøy.",
78
  )
79
- k_slider = gr.Slider(minimum=1, maximum=10, label="Antall treff", value=5, step=1)
80
 
81
- search_btn = gr.Button("Søk")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  output = gr.Textbox(label="Resultater", lines=10)
84
 
85
- # from the selected URL, find the index in the df:
86
- search_btn.click(faiss_search, inputs=[case_dropdown, query, k_slider], outputs=[output])
 
 
 
 
 
87
 
88
  # clear_btn.click(None, inputs=[None, None], outputs=None)
89
  # search_btn.click(faiss_search, inputs=[None, None, None], outputs=["text"])
 
6
  from sentence_transformers import SentenceTransformer
7
 
8
  idx = 0
9
+ index = None
10
+ newdoc = None
11
 
12
  dataset = load_dataset("tollefj/rettsavgjoerelser_100samples_embeddings")
13
  model = SentenceTransformer("NbAiLab/nb-sbert-base")
 
29
  return np.array(doc.embedding.tolist(), dtype="float32")
30
 
31
 
32
+ def faiss_search(query_str, K=5):
33
  global idx
34
+ global index
35
+ global newdoc
36
  # find idx from url:
37
+ # doc_idx = df[df.url == doc_url].index[0]
38
+ # idx = int(doc_idx)
 
 
 
 
 
 
 
39
  target_emb = model.encode([query_str])
40
  target_emb = np.array([target_emb.reshape(-1)])
41
  faiss.normalize_L2(target_emb)
 
62
  dropdown_opts = [doc.url for idx, doc in df.iterrows()]
63
 
64
  with gr.Blocks() as demo:
65
+ gr.HTML(
66
+ """
67
+ <h1>Lovdata rettsavgjørelser - semantisk søk</h1>
68
+ """
 
 
 
 
 
 
69
  )
 
70
 
71
+ def on_selection_change(selected_case):
72
+ global idx
73
+ global index
74
+ global newdoc
75
+ idx = df[df.url == selected_case].index[0]
76
+ print("Selection changed!")
77
+ print(selected_case)
78
+
79
+ newdoc = build_doc_frame(df, idx)
80
+ embeddings = get_doc_embeddings(newdoc)
81
+ faiss.normalize_L2(embeddings)
82
+ index = faiss.IndexFlatIP(768)
83
+ index.add(embeddings)
84
+
85
+ summary = df.iloc[idx].summary.tolist()
86
+ # make a nice html-formatted ul-li list:
87
+ summary_html = "<ul>" + "".join([f"<li>{sent}</li>" for sent in summary]) + "</ul>"
88
+ # summary_dropdown.update(choices=summary, label="Velg fra oppsummeringene")
89
+ url_html = f"<a href='{selected_case}' target='_blank'>{selected_case}</a>"
90
+ return summary_html, url_html
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ case_dropdown = gr.Dropdown(label="Velg en rettsavgjørelse", choices=dropdown_opts)
95
+ summary_html = gr.HTML(label="Predefinert oppsummering", placeholder="<p>Velg en sak først<p>")
96
+ case_url = gr.HTML(label="URL til rettsavgjørelse", placeholder="https://lovdata.no/...")
97
+ with gr.Column():
98
+ query = gr.Textbox(
99
+ label="Søk etter setninger",
100
+ lines=1,
101
+ placeholder="Kollisjon mellom to kjøretøy.",
102
+ )
103
+ k_slider = gr.Slider(minimum=1, maximum=10, label="Antall treff", value=5, step=1)
104
+ search_btn = gr.Button("Søk")
105
 
106
  output = gr.Textbox(label="Resultater", lines=10)
107
 
108
+ case_dropdown.change(
109
+ on_selection_change,
110
+ inputs=[case_dropdown],
111
+ outputs=[summary_html, case_url],
112
+ )
113
+
114
+ search_btn.click(faiss_search, inputs=[query, k_slider], outputs=[output])
115
 
116
  # clear_btn.click(None, inputs=[None, None], outputs=None)
117
  # search_btn.click(faiss_search, inputs=[None, None, None], outputs=["text"])