Spaces:
Sleeping
Sleeping
bartman081523
commited on
Commit
•
c1f45eb
1
Parent(s):
e3a23f6
populate db with 3-word phrases
Browse files- .gitattributes +1 -0
- app.py +68 -58
- gematria.db +2 -2
- gematria.db-journal +3 -0
.gitattributes
CHANGED
@@ -1 +1,2 @@
|
|
1 |
gematria.db filter=lfs diff=lfs merge=lfs -text
|
|
|
|
1 |
gematria.db filter=lfs diff=lfs merge=lfs -text
|
2 |
+
gematria.db-journal filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -8,19 +8,23 @@ from util import process_json_files
|
|
8 |
from gematria import calculate_gematria
|
9 |
from deep_translator import GoogleTranslator, exceptions
|
10 |
from urllib.parse import quote_plus
|
|
|
11 |
|
12 |
# Set up logging
|
13 |
-
logging.basicConfig(level=logging.
|
14 |
|
15 |
# Global variables for database connection, translator, and book names
|
16 |
conn = None
|
17 |
translator = None
|
18 |
book_names = {}
|
19 |
|
|
|
|
|
|
|
20 |
def initialize_database():
|
21 |
"""Initializes the SQLite database."""
|
22 |
global conn
|
23 |
-
conn = sqlite3.connect('gematria.db')
|
24 |
cursor = conn.cursor()
|
25 |
|
26 |
# Create tables if they don't exist
|
@@ -29,20 +33,18 @@ def initialize_database():
|
|
29 |
gematria_sum INTEGER,
|
30 |
words TEXT,
|
31 |
translation TEXT,
|
32 |
-
book TEXT,
|
33 |
chapter INTEGER,
|
34 |
verse INTEGER,
|
35 |
-
PRIMARY KEY (gematria_sum, book, chapter, verse)
|
36 |
)
|
37 |
''')
|
38 |
cursor.execute('''
|
39 |
CREATE TABLE IF NOT EXISTS processed_books (
|
40 |
-
book TEXT PRIMARY KEY,
|
41 |
max_phrase_length INTEGER
|
42 |
)
|
43 |
''')
|
44 |
-
conn.commit()
|
45 |
-
logging.info("Database initialized.")
|
46 |
|
47 |
def initialize_translator():
|
48 |
"""Initializes the Google Translator."""
|
@@ -56,7 +58,7 @@ def populate_database(start_book, end_book, max_phrase_length=1):
|
|
56 |
logging.info(f"Populating database with books from {start_book} to {end_book}...")
|
57 |
cursor = conn.cursor()
|
58 |
|
59 |
-
for book_id in range(start_book, end_book + 1):
|
60 |
book_data = process_json_files(book_id, book_id) # Get data for the single book
|
61 |
|
62 |
# process_json_files returns a dictionary with book_id as key,
|
@@ -84,43 +86,36 @@ def populate_database(start_book, end_book, max_phrase_length=1):
|
|
84 |
continue
|
85 |
|
86 |
chapters = book_data['text']
|
|
|
87 |
for chapter_id, chapter in enumerate(chapters):
|
88 |
-
if not isinstance(chapter, list):
|
89 |
-
logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
|
90 |
-
continue
|
91 |
for verse_id, verse in enumerate(chapter):
|
92 |
verse_text = flatten_text(verse)
|
93 |
-
# Remove text in square brackets
|
94 |
verse_text = re.sub(r'\[.*?\]', '', verse_text)
|
95 |
verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
|
96 |
verse_text = re.sub(r" +", " ", verse_text)
|
97 |
words = verse_text.split()
|
98 |
|
99 |
-
#
|
100 |
for length in range(1, max_phrase_length + 1):
|
101 |
for start in range(len(words) - length + 1):
|
102 |
phrase_candidate = " ".join(words[start:start + length])
|
103 |
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
|
104 |
-
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
conn.commit()
|
109 |
-
logging.info("Database population complete.")
|
110 |
-
|
111 |
-
def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
|
112 |
-
"""Inserts a phrase and its Gematria value into the database."""
|
113 |
global conn
|
114 |
cursor = conn.cursor()
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
|
125 |
def get_translation(phrase):
|
126 |
"""Retrieves or generates the English translation of a Hebrew phrase."""
|
@@ -136,11 +131,10 @@ def get_translation(phrase):
|
|
136 |
else:
|
137 |
translation = translate_and_store(phrase)
|
138 |
cursor.execute('''
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
''', (translation, phrase))
|
143 |
-
conn.commit()
|
144 |
return translation
|
145 |
|
146 |
def translate_and_store(phrase):
|
@@ -178,14 +172,22 @@ def gematria_search_interface(phrase):
|
|
178 |
if not phrase.strip():
|
179 |
return "Please enter a phrase."
|
180 |
|
181 |
-
global conn, book_names
|
182 |
conn = sqlite3.connect('gematria.db')
|
183 |
cursor = conn.cursor()
|
184 |
|
185 |
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
|
186 |
logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
|
187 |
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
if not matching_phrases:
|
190 |
return "No matching phrases found."
|
191 |
|
@@ -206,13 +208,13 @@ def gematria_search_interface(phrase):
|
|
206 |
translation = get_translation(words)
|
207 |
link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
|
208 |
results.append(f"""
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
results.append("</div>") # Close results-container div
|
217 |
|
218 |
conn.close()
|
@@ -221,28 +223,28 @@ def gematria_search_interface(phrase):
|
|
221 |
style = """
|
222 |
<style>
|
223 |
.results-container {
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
}
|
228 |
|
229 |
.result-item {
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
}
|
235 |
|
236 |
.hebrew-phrase {
|
237 |
-
|
238 |
-
|
239 |
}
|
240 |
|
241 |
.bible-link {
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
}
|
247 |
</style>
|
248 |
"""
|
@@ -261,8 +263,16 @@ def run_app():
|
|
261 |
initialize_translator()
|
262 |
|
263 |
# Pre-populate the database
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
iface = gr.Interface(
|
268 |
fn=gematria_search_interface,
|
|
|
8 |
from gematria import calculate_gematria
|
9 |
from deep_translator import GoogleTranslator, exceptions
|
10 |
from urllib.parse import quote_plus
|
11 |
+
from tqdm import tqdm # Import tqdm for progress bars
|
12 |
|
13 |
# Set up logging
|
14 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s')
|
15 |
|
16 |
# Global variables for database connection, translator, and book names
|
17 |
conn = None
|
18 |
translator = None
|
19 |
book_names = {}
|
20 |
|
21 |
+
# Pre-load Gematria values for common phrases to speed up search
|
22 |
+
gematria_cache = {}
|
23 |
+
|
24 |
def initialize_database():
|
25 |
"""Initializes the SQLite database."""
|
26 |
global conn
|
27 |
+
conn = sqlite3.connect('gematria.db', isolation_level=None) # Autocommit for faster insertion
|
28 |
cursor = conn.cursor()
|
29 |
|
30 |
# Create tables if they don't exist
|
|
|
33 |
gematria_sum INTEGER,
|
34 |
words TEXT,
|
35 |
translation TEXT,
|
36 |
+
book TEXT,
|
37 |
chapter INTEGER,
|
38 |
verse INTEGER,
|
39 |
+
PRIMARY KEY (gematria_sum, words, book, chapter, verse)
|
40 |
)
|
41 |
''')
|
42 |
cursor.execute('''
|
43 |
CREATE TABLE IF NOT EXISTS processed_books (
|
44 |
+
book TEXT PRIMARY KEY,
|
45 |
max_phrase_length INTEGER
|
46 |
)
|
47 |
''')
|
|
|
|
|
48 |
|
49 |
def initialize_translator():
|
50 |
"""Initializes the Google Translator."""
|
|
|
58 |
logging.info(f"Populating database with books from {start_book} to {end_book}...")
|
59 |
cursor = conn.cursor()
|
60 |
|
61 |
+
for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"):
|
62 |
book_data = process_json_files(book_id, book_id) # Get data for the single book
|
63 |
|
64 |
# process_json_files returns a dictionary with book_id as key,
|
|
|
86 |
continue
|
87 |
|
88 |
chapters = book_data['text']
|
89 |
+
# Faster iteration with enumerate and list comprehension
|
90 |
for chapter_id, chapter in enumerate(chapters):
|
|
|
|
|
|
|
91 |
for verse_id, verse in enumerate(chapter):
|
92 |
verse_text = flatten_text(verse)
|
93 |
+
# Remove text in square brackets and non-Hebrew characters
|
94 |
verse_text = re.sub(r'\[.*?\]', '', verse_text)
|
95 |
verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
|
96 |
verse_text = re.sub(r" +", " ", verse_text)
|
97 |
words = verse_text.split()
|
98 |
|
99 |
+
# Use a generator to avoid building large lists in memory
|
100 |
for length in range(1, max_phrase_length + 1):
|
101 |
for start in range(len(words) - length + 1):
|
102 |
phrase_candidate = " ".join(words[start:start + length])
|
103 |
gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
|
104 |
+
yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
|
105 |
|
106 |
+
def insert_phrases_to_db(phrases):
|
107 |
+
"""Inserts a list of phrases into the database efficiently."""
|
|
|
|
|
|
|
|
|
|
|
108 |
global conn
|
109 |
cursor = conn.cursor()
|
110 |
+
|
111 |
+
# Use executemany to insert multiple rows at once
|
112 |
+
cursor.executemany('''
|
113 |
+
INSERT OR IGNORE INTO results (gematria_sum, words, book, chapter, verse)
|
114 |
+
VALUES (?, ?, ?, ?, ?)
|
115 |
+
''', phrases)
|
116 |
+
|
117 |
+
# Commit the changes outside the loop for better performance
|
118 |
+
conn.commit()
|
119 |
|
120 |
def get_translation(phrase):
|
121 |
"""Retrieves or generates the English translation of a Hebrew phrase."""
|
|
|
131 |
else:
|
132 |
translation = translate_and_store(phrase)
|
133 |
cursor.execute('''
|
134 |
+
UPDATE results
|
135 |
+
SET translation = ?
|
136 |
+
WHERE words = ?
|
137 |
''', (translation, phrase))
|
|
|
138 |
return translation
|
139 |
|
140 |
def translate_and_store(phrase):
|
|
|
172 |
if not phrase.strip():
|
173 |
return "Please enter a phrase."
|
174 |
|
175 |
+
global conn, book_names, gematria_cache
|
176 |
conn = sqlite3.connect('gematria.db')
|
177 |
cursor = conn.cursor()
|
178 |
|
179 |
phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
|
180 |
logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
|
181 |
|
182 |
+
# Check if Gematria is in cache
|
183 |
+
if phrase_gematria in gematria_cache:
|
184 |
+
matching_phrases = gematria_cache[phrase_gematria]
|
185 |
+
else:
|
186 |
+
# Search in the database
|
187 |
+
matching_phrases = search_gematria_in_db(phrase_gematria)
|
188 |
+
# Cache the results for future searches
|
189 |
+
gematria_cache[phrase_gematria] = matching_phrases
|
190 |
+
|
191 |
if not matching_phrases:
|
192 |
return "No matching phrases found."
|
193 |
|
|
|
208 |
translation = get_translation(words)
|
209 |
link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
|
210 |
results.append(f"""
|
211 |
+
<div class='result-item'>
|
212 |
+
<p>Chapter: {chapter}, Verse: {verse}</p>
|
213 |
+
<p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
|
214 |
+
<p>Translation: {translation}</p>
|
215 |
+
<a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
|
216 |
+
</div>
|
217 |
+
""")
|
218 |
results.append("</div>") # Close results-container div
|
219 |
|
220 |
conn.close()
|
|
|
223 |
style = """
|
224 |
<style>
|
225 |
.results-container {
|
226 |
+
display: grid;
|
227 |
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
228 |
+
gap: 20px;
|
229 |
}
|
230 |
|
231 |
.result-item {
|
232 |
+
border: 1px solid #ccc;
|
233 |
+
padding: 15px;
|
234 |
+
border-radius: 5px;
|
235 |
+
box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
|
236 |
}
|
237 |
|
238 |
.hebrew-phrase {
|
239 |
+
font-family: 'SBL Hebrew', 'Ezra SIL', serif;
|
240 |
+
direction: rtl;
|
241 |
}
|
242 |
|
243 |
.bible-link {
|
244 |
+
display: block;
|
245 |
+
margin-top: 10px;
|
246 |
+
color: #007bff;
|
247 |
+
text-decoration: none;
|
248 |
}
|
249 |
</style>
|
250 |
"""
|
|
|
263 |
initialize_translator()
|
264 |
|
265 |
# Pre-populate the database
|
266 |
+
logging.info("Starting database population...")
|
267 |
+
phrases_to_insert = [] # Collect phrases before inserting in bulk
|
268 |
+
for gematria_sum, phrase, book, chapter, verse in tqdm(populate_database(1, 39, max_phrase_length=3), desc="Populating Database"): # Books 1 to 39
|
269 |
+
phrases_to_insert.append((gematria_sum, phrase, book, chapter, verse))
|
270 |
+
if len(phrases_to_insert) >= 1000: # Insert in batches of 1000 for efficiency
|
271 |
+
insert_phrases_to_db(phrases_to_insert)
|
272 |
+
phrases_to_insert = []
|
273 |
+
if phrases_to_insert: # Insert remaining phrases
|
274 |
+
insert_phrases_to_db(phrases_to_insert)
|
275 |
+
logging.info("Database population complete.")
|
276 |
|
277 |
iface = gr.Interface(
|
278 |
fn=gematria_search_interface,
|
gematria.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8ae62432a24f36453d7080be5d95162d949ac77f006db0a0a81000c40abcabf
|
3 |
+
size 65466368
|
gematria.db-journal
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2ff707a32b432b7883756f3d0db45fc3031ffefa7f890197ac1d9bed4fb9849
|
3 |
+
size 3884488
|