Sharathhebbar24 commited on
Commit
f652b33
·
verified ·
1 Parent(s): 8e5d078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -81
app.py CHANGED
@@ -1,96 +1,96 @@
1
- import streamlit as st
2
- import torch
3
- from transformers import AutoTokenizer, AutoModel
4
- from sentence_transformers import util
5
- class SentenceSimiliarity():
6
 
7
- def __init__(self, model_name, sentence1, sentence2):
8
- self.sentence1 = sentence1
9
- self.sentence2 = sentence2
10
- self.model_name = model_name
11
- self.model = AutoModel.from_pretrained(self.model_name)
12
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
13
 
14
- def tokenize(self):
15
- tokenized1 = self.tokenizer(
16
- self.sentence1,
17
- return_tensors='pt',
18
- padding=True,
19
- truncation=True
20
- )
21
- tokenized2 = self.tokenizer(
22
- self.sentence2,
23
- return_tensors='pt',
24
- padding=True,
25
- truncation=True
26
- )
27
- return tokenized1, tokenized2
28
 
29
- def get_embeddings(self):
30
- tokenized1, tokenized2 = self.tokenize()
31
- with torch.no_grad():
32
- embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
33
- embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
34
- return embeddings1, embeddings2
35
 
36
- def get_similarity_scores(self):
37
- embeddings1, embeddings2 = self.get_embeddings()
38
- scores = util.cos_sim(embeddings1, embeddings2)
39
- return scores
40
 
41
 
42
- def results(self):
43
- scores = self.get_similarity_scores()
44
- statement = f"The sentence has {scores.item() * 100:.2f}% similarity"
45
- return statement
46
 
47
 
48
- class UI():
49
 
50
- def __init__(self):
51
- st.title("Sentence Similiarity Checker")
52
- st.caption("You can use this for checking similarity between resume and job description")
53
 
54
- def get(self):
55
- self.sentence1 = st.text_area(
56
- label="Sentence 1",
57
- help="This is a parent text the next text will be compared with this text"
58
- )
59
- self.sentence2 = st.text_area(
60
- label="Sentence 2",
61
- help="This is a child text"
62
- )
63
- self.button = st.button(
64
- label="Check",
65
- help='Check Sentence Similarity'
66
- )
67
 
68
- def model_selection(self):
69
- available_models = [
70
- "distilbert-base-uncased",
71
- "bert-base-uncased",
72
- "sentence-transformers/all-MiniLM-L6-v2",
73
- # "sentence-transformers/all-mpnet-base-v2",
74
- # "intfloat/multilingual-e5-base",
75
- # "togethercomputer/m2-bert-80M-32k-retrieval",
76
- # "togethercomputer/m2-bert-80M-8k-retrieval",
77
- # "togethercomputer/m2-bert-80M-2k-retrieval",
78
- ]
79
- model_name = st.sidebar.selectbox(
80
- label="Select Your Models",
81
- options=available_models,
82
- )
83
- return model_name
84
 
85
 
86
- def result(self):
87
- self.get()
88
- model_name = self.model_selection()
89
- ss = SentenceSimiliarity(model_name, self.sentence1, self.sentence2)
90
 
91
- if self.button:
92
- st.text(ss.results())
93
- # print(ss.results())
94
 
95
- ui = UI()
96
- ui.result()
 
1
+ # import streamlit as st
2
+ # import torch
3
+ # from transformers import AutoTokenizer, AutoModel
4
+ # from sentence_transformers import util
5
+ # class SentenceSimiliarity():
6
 
7
+ # def __init__(self, model_name, sentence1, sentence2):
8
+ # self.sentence1 = sentence1
9
+ # self.sentence2 = sentence2
10
+ # self.model_name = model_name
11
+ # self.model = AutoModel.from_pretrained(self.model_name)
12
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
13
 
14
+ # def tokenize(self):
15
+ # tokenized1 = self.tokenizer(
16
+ # self.sentence1,
17
+ # return_tensors='pt',
18
+ # padding=True,
19
+ # truncation=True
20
+ # )
21
+ # tokenized2 = self.tokenizer(
22
+ # self.sentence2,
23
+ # return_tensors='pt',
24
+ # padding=True,
25
+ # truncation=True
26
+ # )
27
+ # return tokenized1, tokenized2
28
 
29
+ # def get_embeddings(self):
30
+ # tokenized1, tokenized2 = self.tokenize()
31
+ # with torch.no_grad():
32
+ # embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
33
+ # embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
34
+ # return embeddings1, embeddings2
35
 
36
+ # def get_similarity_scores(self):
37
+ # embeddings1, embeddings2 = self.get_embeddings()
38
+ # scores = util.cos_sim(embeddings1, embeddings2)
39
+ # return scores
40
 
41
 
42
+ # def results(self):
43
+ # scores = self.get_similarity_scores()
44
+ # statement = f"The sentence has {scores.item() * 100:.2f}% similarity"
45
+ # return statement
46
 
47
 
48
+ # class UI():
49
 
50
+ # def __init__(self):
51
+ # st.title("Sentence Similiarity Checker")
52
+ # st.caption("You can use this for checking similarity between resume and job description")
53
 
54
+ # def get(self):
55
+ # self.sentence1 = st.text_area(
56
+ # label="Sentence 1",
57
+ # help="This is a parent text the next text will be compared with this text"
58
+ # )
59
+ # self.sentence2 = st.text_area(
60
+ # label="Sentence 2",
61
+ # help="This is a child text"
62
+ # )
63
+ # self.button = st.button(
64
+ # label="Check",
65
+ # help='Check Sentence Similarity'
66
+ # )
67
 
68
+ # def model_selection(self):
69
+ # available_models = [
70
+ # "distilbert-base-uncased",
71
+ # "bert-base-uncased",
72
+ # "sentence-transformers/all-MiniLM-L6-v2",
73
+ # # "sentence-transformers/all-mpnet-base-v2",
74
+ # # "intfloat/multilingual-e5-base",
75
+ # # "togethercomputer/m2-bert-80M-32k-retrieval",
76
+ # # "togethercomputer/m2-bert-80M-8k-retrieval",
77
+ # # "togethercomputer/m2-bert-80M-2k-retrieval",
78
+ # ]
79
+ # model_name = st.sidebar.selectbox(
80
+ # label="Select Your Models",
81
+ # options=available_models,
82
+ # )
83
+ # return model_name
84
 
85
 
86
+ # def result(self):
87
+ # self.get()
88
+ # model_name = self.model_selection()
89
+ # ss = SentenceSimiliarity(model_name, self.sentence1, self.sentence2)
90
 
91
+ # if self.button:
92
+ # st.text(ss.results())
93
+ # # print(ss.results())
94
 
95
+ # ui = UI()
96
+ # ui.result()