Sharathhebbar24 commited on
Commit
ec1a337
·
verified ·
1 Parent(s): f652b33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -81
app.py CHANGED
@@ -1,96 +1,135 @@
1
- # import streamlit as st
2
- # import torch
 
 
3
  # from transformers import AutoTokenizer, AutoModel
4
  # from sentence_transformers import util
5
- # class SentenceSimiliarity():
6
 
7
- # def __init__(self, model_name, sentence1, sentence2):
8
- # self.sentence1 = sentence1
9
- # self.sentence2 = sentence2
10
- # self.model_name = model_name
11
- # self.model = AutoModel.from_pretrained(self.model_name)
12
- # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # def tokenize(self):
15
- # tokenized1 = self.tokenizer(
16
- # self.sentence1,
17
- # return_tensors='pt',
18
- # padding=True,
19
- # truncation=True
20
- # )
21
- # tokenized2 = self.tokenizer(
22
- # self.sentence2,
23
- # return_tensors='pt',
24
- # padding=True,
25
- # truncation=True
26
- # )
27
- # return tokenized1, tokenized2
28
 
29
- # def get_embeddings(self):
30
- # tokenized1, tokenized2 = self.tokenize()
31
- # with torch.no_grad():
32
- # embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
33
- # embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
34
- # return embeddings1, embeddings2
 
35
 
36
- # def get_similarity_scores(self):
37
- # embeddings1, embeddings2 = self.get_embeddings()
38
- # scores = util.cos_sim(embeddings1, embeddings2)
39
- # return scores
40
 
41
-
42
- # def results(self):
43
- # scores = self.get_similarity_scores()
44
- # statement = f"The sentence has {scores.item() * 100:.2f}% similarity"
45
- # return statement
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
- # class UI():
49
 
50
- # def __init__(self):
51
- # st.title("Sentence Similiarity Checker")
52
- # st.caption("You can use this for checking similarity between resume and job description")
53
 
54
- # def get(self):
55
- # self.sentence1 = st.text_area(
56
- # label="Sentence 1",
57
- # help="This is a parent text the next text will be compared with this text"
58
- # )
59
- # self.sentence2 = st.text_area(
60
- # label="Sentence 2",
61
- # help="This is a child text"
62
- # )
63
- # self.button = st.button(
64
- # label="Check",
65
- # help='Check Sentence Similarity'
66
- # )
 
 
 
 
 
 
67
 
68
- # def model_selection(self):
69
- # available_models = [
70
- # "distilbert-base-uncased",
71
- # "bert-base-uncased",
72
- # "sentence-transformers/all-MiniLM-L6-v2",
73
- # # "sentence-transformers/all-mpnet-base-v2",
74
- # # "intfloat/multilingual-e5-base",
75
- # # "togethercomputer/m2-bert-80M-32k-retrieval",
76
- # # "togethercomputer/m2-bert-80M-8k-retrieval",
77
- # # "togethercomputer/m2-bert-80M-2k-retrieval",
78
- # ]
79
- # model_name = st.sidebar.selectbox(
80
- # label="Select Your Models",
81
- # options=available_models,
82
- # )
83
- # return model_name
84
-
85
 
86
- # def result(self):
87
- # self.get()
88
- # model_name = self.model_selection()
89
- # ss = SentenceSimiliarity(model_name, self.sentence1, self.sentence2)
90
 
91
- # if self.button:
92
- # st.text(ss.results())
93
- # # print(ss.results())
94
 
95
- # ui = UI()
96
- # ui.result()
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ import torch
5
  # from transformers import AutoTokenizer, AutoModel
6
  # from sentence_transformers import util
7
+ class SentenceSimiliarity():
8
 
9
+ def __init__(self, model_name, sentence1, sentence2):
10
+ self.KEY = os.getenv("HF_KEY")
11
+ self.headers = {"Authorization": f"Bearer {self.KEY}"}
12
+ self.sentence1 = sentence1
13
+ self.sentence2 = sentence2
14
+ self.api_url = f"https://api-inference.huggingface.co/models/{model_name}"
15
+
16
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
17
+ # self.model = AutoModel.from_pretrained(self.model_name)
18
+
19
+ def model_selection(self):
20
+ available_models = [
21
+ # "distilbert-base-uncased",
22
+ # "bert-base-uncased",
23
+ "sentence-transformers/all-MiniLM-L6-v2",
24
+ "sentence-transformers/all-mpnet-base-v2",
25
+ "sentence-transformers/distiluse-base-multilingual-cased-v2",
26
+ "intfloat/e5-small",
27
+ "intfloat/e5-base",
28
+ "intfloat/e5-large-v2",
29
+ "intfloat/multilingual-e5-base",
30
+ # "togethercomputer/m2-bert-80M-32k-retrieval",
31
+ # "togethercomputer/m2-bert-80M-8k-retrieval",
32
+ # "togethercomputer/m2-bert-80M-2k-retrieval",
33
+ ]
34
+ self.model_name = st.sidebar.selectbox(
35
+ label="Select Your Models",
36
+ options=available_models,
37
+ )
38
+
39
 
40
+ # def tokenize(self):
41
+ # tokenized1 = self.tokenizer(
42
+ # self.sentence1,
43
+ # return_tensors='pt',
44
+ # padding=True,
45
+ # truncation=True
46
+ # )
47
+ # tokenized2 = self.tokenizer(
48
+ # self.sentence2,
49
+ # return_tensors='pt',
50
+ # padding=True,
51
+ # truncation=True
52
+ # )
53
+ # return tokenized1, tokenized2
54
 
55
+ # def get_embeddings(self):
56
+ # # tokenized1, tokenized2 = self.tokenize()
57
+
58
+ # with torch.no_grad():
59
+ # embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
60
+ # embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
61
+ # return embeddings1, embeddings2
62
 
63
+ # def get_similarity_scores(self):
64
+ # embeddings1, embeddings2 = self.get_embeddings()
65
+ # scores = util.cos_sim(embeddings1, embeddings2)
66
+ # return scores
67
 
68
+ def query(self, payload):
69
+ response = requests.post(self.api_url, headers=self.headers, json=payload)
70
+ return response.json()
71
+
72
+ def results(self):
73
+ scores = self.query({
74
+ "inputs": {
75
+ "source_sentence": self.sentence1,
76
+ "sentences": [
77
+ self.sentence2,
78
+ ]
79
+ },
80
+ })
81
+ # scores = self.get_similarity_scores()
82
+ statement = f"The sentence has {scores[0] * 100:.2f}% similarity"
83
+ # statement = scores
84
+ return statement
85
 
86
 
87
+ class UI():
88
 
89
+ def __init__(self):
90
+ st.title("Sentence Similiarity Checker")
91
+ st.caption("You can use this for checking similarity between resume and job description")
92
 
93
+ def get(self):
94
+ available_models = [
95
+ # "distilbert-base-uncased",
96
+ # "bert-base-uncased",
97
+ "sentence-transformers/all-MiniLM-L6-v2",
98
+ "sentence-transformers/all-mpnet-base-v2",
99
+ "sentence-transformers/distiluse-base-multilingual-cased-v2",
100
+ "intfloat/e5-small",
101
+ "intfloat/e5-base",
102
+ "intfloat/e5-large-v2",
103
+ "intfloat/multilingual-e5-base",
104
+ # "togethercomputer/m2-bert-80M-32k-retrieval",
105
+ # "togethercomputer/m2-bert-80M-8k-retrieval",
106
+ # "togethercomputer/m2-bert-80M-2k-retrieval",
107
+ ]
108
+ self.model_name = st.sidebar.selectbox(
109
+ label="Select Your Models",
110
+ options=available_models,
111
+ )
112
 
113
+ self.sentence1 = st.text_area(
114
+ label="Sentence 1",
115
+ help="This is a parent text the next text will be compared with this text"
116
+ )
117
+ self.sentence2 = st.text_area(
118
+ label="Sentence 2",
119
+ help="This is a child text"
120
+ )
121
+ self.button = st.button(
122
+ label="Check",
123
+ help='Check Sentence Similarity'
124
+ )
 
 
 
 
 
125
 
126
+ def result(self):
127
+ self.get()
128
+ ss = SentenceSimiliarity(self.model_name, self.sentence1, self.sentence2)
 
129
 
130
+ if self.button:
131
+ st.text(ss.results())
132
+ # print(ss.results())
133
 
134
+ ui = UI()
135
+ ui.result()