File size: 3,306 Bytes
083997e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import os
import torch
import pickle
import gzip

from torch.nn.functional import cosine_similarity
from model import create_semantic_ranking_model
from timeit import default_timer as timer
from typing import Tuple, Dict

### Load example texts ###
questions_texts = []
with open("questions_texts.txt", "r") as file:
  questions_texts = [line.strip() for line in file.readlines()]

answers_texts = []
with open("answers_texts.txt", "r") as file:
  answers_texts = [line.strip() for line in file.readlines()]

### Model and transforms preparation ###
# Create model and tokenizer
model, tokenizer = create_semantic_ranking_model()

# Load saved weights
model.load_state_dict(
    torch.load(f="all-MiniLM-L6-v2.pth",
               map_location=torch.device("cpu")) # load to CPU
)

# Load the embeddings
with gzip.open('response_embeddings.pkl.gz', 'rb') as f:
  response_embeddings = pickle.load(f)

# Load the response list
with gzip.open('response_list.pkl.gz', 'rb') as f:
  response_list = pickle.load(f)

### Predict function ###
def predict(text) -> Tuple[Dict, float]:
  # Start a timer
  start_time = timer()

  # Set the model to eval
  model.eval()

  # Set up the inputs
  tokenized_inputs = tokenizer(text, return_tensors="pt", max_length=128, truncation=True, padding="max_length")

  # Get input_embeddings
  with torch.inference_mode():
    input_embeddings = model(**tokenized_inputs)

  # Compute similarity scores
  similarity_scores = cosine_similarity(input_embeddings.unsqueeze(1), response_embeddings.unsqueeze(0), dim=2)
  top_responses_indices = torch.topk(similarity_scores, k=5, dim=1).indices.squeeze()

  # Retrieve the actual response texts
  top_responses = [response_list[idx] for idx in top_responses_indices]

  # Get actual response
  actual_response = None
  for question, answer in zip(questions_texts, answers_texts):
    if text.strip() == question.strip():
      actual_response = answer
      break

  # Calculate pred time
  end_time = timer()
  pred_time = round(end_time - start_time, 4)

  # Return pred dict and pred time
  return {"Top Responses": top_responses, "Actual Response": actual_response}, pred_time

### 4. Gradio app ###
# Create title, description and article
title = "Semantic Ranking with MiniLM-L6-v2"
description = "[A MiniLM-L6-H384-uncased MiniLM based model](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) sentence embedding model trained to rank results from [HuggingFace 🤗 Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3). [Source Code Found Here](https://colab.research.google.com/drive/1o5a9zH1TxzaxLKV5AFUhZE8L8yMnO9Jw?usp=sharing)"
article = "Built with [Gradio](https://github.com/gradio-app/gradio) and [PyTorch](https://pytorch.org/). [Source Code Found Here](https://colab.research.google.com/drive/1o5a9zH1TxzaxLKV5AFUhZE8L8yMnO9Jw?usp=sharing)"

# Create the Gradio demo
demo = gr.Interface(fn=predict,
    inputs=gr.Textbox(lines=2, placeholder="Type your text here..."),
    outputs=[gr.JSON(label="Top Responses"),
             gr.Textbox(label="Actual Response", disabled=True),
             gr.Number(label="Prediction time (s)")],
    examples=example_texts,
    title=title,
    description=description,
    article=article)

# Launch the demo
demo.launch()