levimack commited on
Commit
21e50d6
1 Parent(s): f400874

Create scripture_compare.py

Browse files
Files changed (1) hide show
  1. scripture_compare.py +85 -0
scripture_compare.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request, json
2
+ from transformers.utils import logging
3
+ logging.set_verbosity_error()
4
+
5
+ from sentence_transformers import SentenceTransformer
6
+ from sentence_transformers import util
7
+
8
+ # Search URL for Mackabee Ministries scriptures. Will fetch scriptures as a json object.
9
+ SEARCH_URL = 'https://dd4-biblical.appspot.com/_api/scriptures/v1/search?searchText={}&lang=en&version={}'
10
+ STANDARD_VERSION = 'RSKJ' # Normally RSKJ (Restored King James Version) as our standard text, may also use NRSV.
11
+ COMPARISON_VERSION = 'SEP' # Will use the Septuagint "SEP" version for most comparisons, also have DSS available for Isa
12
+
13
+
14
+ # The list of reference scriptures to compare, this will be used in a search that can parse a lot of different reference
15
+ # types
16
+ REFERENCES = 'Gen 1'
17
+
18
+
19
+ # Candidate helper class that holds a scripture, the comparison text and the resulting match score.
20
+ class Candidate:
21
+ compareText = ''
22
+ score = 0
23
+
24
+ def __init__(self, book, chapter, verse, standardText):
25
+ self.book = book
26
+ self.chapter = chapter
27
+ self.verse = verse
28
+ self.standardText = standardText
29
+
30
+ def reference(self):
31
+ return '{} {}:{}'.format(self.book, self.chapter, self.verse)
32
+
33
+ def __str__(self):
34
+ return '{},{:.4f},"{}","{}"'.format(self.reference(), self.score, self.standardText, self.compareText)
35
+
36
+ def __repr__(self):
37
+ return str(self)
38
+
39
+ def compare(model, candidate):
40
+ embeddings1 = model.encode(candidate.standardText, convert_to_tensor=True)
41
+ #print(embeddings1)
42
+ embeddings2 = model.encode(candidate.compareText, convert_to_tensor=True)
43
+ #print(embeddings2)
44
+
45
+ cosine_scores = util.cos_sim(embeddings1, embeddings2)
46
+ #print(cosine_scores)
47
+
48
+ candidate.score = cosine_scores[0][0]
49
+
50
+
51
+ if __name__ == '__main__':
52
+
53
+ candidates = []
54
+ candidateMap = {}
55
+ # first connect to the api and get the standard translation
56
+ with urllib.request.urlopen(SEARCH_URL.format(urllib.parse.quote(REFERENCES), STANDARD_VERSION)) as url:
57
+ response = json.load(url)
58
+ # print(response)
59
+
60
+ for result in response['items']:
61
+ # print(result)
62
+ candidate = Candidate(result['book'], result['chapter'], result['verse'], result['text'])
63
+ candidates.append(candidate)
64
+ candidateMap[candidate.reference()] = candidate
65
+
66
+ # Then fetch the comparison text
67
+ with urllib.request.urlopen(SEARCH_URL.format(urllib.parse.quote(REFERENCES), COMPARISON_VERSION)) as url:
68
+ response = json.load(url)
69
+ # print(response)
70
+
71
+ for result in response['items']:
72
+ # print(result)
73
+ candidate = candidateMap['{} {}:{}'.format(result['book'], result['chapter'], result['verse'])]
74
+ if candidate is None :
75
+ candidate = Candidate(result['book'], result['chapter'], result['verse'], '')
76
+ candidates.append(candidate)
77
+ candidateMap[candidate.reference()] = candidate
78
+
79
+ candidate.compareText = result['text']
80
+
81
+ model = SentenceTransformer("all-MiniLM-L6-v2")
82
+
83
+ for candidate in candidates:
84
+ compare(model, candidate)
85
+ print(candidate)