Ahsen Khaliq commited on
Commit
04a7a46
1 Parent(s): 3c1ad6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -6
app.py CHANGED
@@ -1,10 +1,51 @@
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  title = "SimCSE"
3
- description = "Gradio Demo for SimCSE, a transformer model trained using Ben Wang's Mesh Transformer JAX. 'GPT-J' refers to the class of model, while '6B' represents the number of trainable parameters. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
4
- article = "<p style='text-align: center'><a href='https://github.com/kingoflolz/mesh-transformer-jax' target='_blank'>GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model</a></p>"
5
  examples = [
6
- ['The tower is 324 metres (1,063 ft) tall,'],
7
- ["The Moon's orbit around Earth has"],
8
- ["The smooth Borealis basin in the Northern Hemisphere covers 40%"]
9
  ]
10
- gr.Interface.load("huggingface/princeton-nlp/sup-simcse-bert-base-uncased", inputs=gr.inputs.Textbox(lines=5, label="Input Text"),title=title,description=description,article=article, examples=examples,enable_queue=True).launch()
 
1
+ import torch
2
+ from scipy.spatial.distance import cosine
3
+ from transformers import AutoModel, AutoTokenizer
4
  import gradio as gr
5
+
6
+ # Import our models. The package will take care of downloading the models automatically
7
+ tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
8
+ model = AutoModel.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
9
+
10
+ def simcse(text1, text2, text3):
11
+ # Tokenize input texts
12
+ texts = [
13
+ text1,
14
+ text2,
15
+ text3
16
+ ]
17
+ inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
18
+
19
+ # Get the embeddings
20
+ with torch.no_grad():
21
+ embeddings = model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
22
+
23
+ # Calculate cosine similarities
24
+ # Cosine similarities are in [-1, 1]. Higher means more similar
25
+ cosine_sim_0_1 = 1 - cosine(embeddings[0], embeddings[1])
26
+ cosine_sim_0_2 = 1 - cosine(embeddings[0], embeddings[2])
27
+ return {"cosine similarity":cosine_sim_0_1}, {"cosine similarity":cosine_sim_0_2}
28
+
29
+
30
+ inputs = [
31
+ gr.inputs.Textbox(lines=5, label="Input Text One"),
32
+ gr.inputs.Textbox(lines=5, label="Input Text Two"),
33
+ gr.inputs.Textbox(lines=5, label="Input Text Three")
34
+ ]
35
+
36
+ outputs = [
37
+ gr.outputs.Label(type="confidences",label="Cosine similarity between text one and two"),
38
+ gr.outputs.Label(type="confidences", label="Cosine similarity between text one and three")
39
+ ]
40
+
41
+
42
  title = "SimCSE"
43
+ description = "demo for Princeton-NLP SimCSE. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
44
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.08821'>SimCSE: Simple Contrastive Learning of Sentence Embeddings</a> | <a href='https://github.com/princeton-nlp/SimCSE'>Github Repo</a></p>"
45
  examples = [
46
+ ["There's a kid on a skateboard.",
47
+ "A kid is skateboarding.",
48
+ "A kid is inside the house."]
49
  ]
50
+
51
+ gr.Interface(simcse, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()