bartman081523 commited on
Commit
96b58ba
1 Parent(s): f83d5b7

add numbers search

Browse files
Files changed (1) hide show
  1. app.py +322 -311
app.py CHANGED
@@ -4,327 +4,338 @@ import re
4
  import sqlite3
5
  import logging
6
  from collections import defaultdict
 
 
7
  from util import process_json_files
8
  from gematria import calculate_gematria
9
  from deep_translator import GoogleTranslator, exceptions
10
  from urllib.parse import quote_plus
11
- from tqdm import tqdm # Import tqdm for progress bars
 
 
 
 
 
12
 
13
  # Set up logging
14
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s')
15
 
16
- # Global variables for database connection, translator, and book names
17
- conn = None
18
- translator = None
19
- book_names = {}
20
-
21
- # Pre-load Gematria values for common phrases to speed up search
22
- gematria_cache = {}
23
-
24
- # Dictionary to store translations
25
- translation_cache = {}
26
-
27
- def initialize_database():
28
- """Initializes the SQLite database."""
29
- global conn
30
- conn = sqlite3.connect('gematria.db', isolation_level=None) # Autocommit for faster insertion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  cursor = conn.cursor()
32
-
33
- # Create tables if they don't exist
34
  cursor.execute('''
35
- CREATE TABLE IF NOT EXISTS results (
36
- gematria_sum INTEGER,
37
- words TEXT,
38
- translation TEXT,
39
- book TEXT,
40
- chapter INTEGER,
41
- verse INTEGER,
42
- PRIMARY KEY (gematria_sum, words, book, chapter, verse)
43
- )
44
- ''')
45
- cursor.execute('''
46
- CREATE TABLE IF NOT EXISTS processed_books (
47
- book TEXT PRIMARY KEY,
48
- max_phrase_length INTEGER
49
- )
50
- ''')
51
- cursor.execute('''
52
- CREATE TABLE IF NOT EXISTS translations (
53
- hebrew_phrase TEXT PRIMARY KEY,
54
- english_translation TEXT
55
- )
56
- ''')
57
-
58
- def initialize_translator():
59
- """Initializes the Google Translator."""
60
- global translator
61
- translator = GoogleTranslator(source='iw', target='en')
62
- logging.info("Translator initialized.")
63
-
64
- def populate_database(start_book, end_book, max_phrase_length=1):
65
- """Populates the database with phrases from the Tanach and their Gematria values."""
66
- global conn, book_names
67
- logging.info(f"Populating database with books from {start_book} to {end_book}...")
68
- cursor = conn.cursor()
69
-
70
- for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"):
71
- book_data = process_json_files(book_id, book_id) # Get data for the single book
72
-
73
- # process_json_files returns a dictionary with book_id as key,
74
- # so access the book data directly
75
- if book_id in book_data:
76
- book_data = book_data[book_id]
77
- if 'title' not in book_data or not isinstance(book_data['title'], str):
78
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.")
79
- continue
80
-
81
- title = book_data['title']
82
- book_names[book_id] = title
83
-
84
- # Check if the book is already processed for this max_phrase_length
85
- cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
86
- result = cursor.fetchone()
87
- if result and result[0] >= max_phrase_length:
88
- logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
89
- continue
90
-
91
- logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}")
92
-
93
- if 'text' not in book_data or not isinstance(book_data['text'], list):
94
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
95
- continue
96
-
97
- chapters = book_data['text']
98
- # Faster iteration with enumerate and list comprehension
99
- for chapter_id, chapter in enumerate(chapters):
100
- for verse_id, verse in enumerate(chapter):
101
- verse_text = flatten_text(verse)
102
- # Remove text in square brackets and non-Hebrew characters
103
- verse_text = re.sub(r'\[.*?\]', '', verse_text)
104
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
105
- verse_text = re.sub(r" +", " ", verse_text)
106
- words = verse_text.split()
107
-
108
- # Use a generator to avoid building large lists in memory
109
- for length in range(1, max_phrase_length + 1):
110
- for start in range(len(words) - length + 1):
111
- phrase_candidate = " ".join(words[start:start + length])
112
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
113
- yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
114
-
115
- # Mark the book as processed with the current max_phrase_length
116
- cursor.execute('''
117
- INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
118
- VALUES (?, ?)
119
- ''', (title, max_phrase_length))
120
-
121
- def insert_phrases_to_db(phrases):
122
- """Inserts a list of phrases into the database efficiently."""
123
- global conn
124
- cursor = conn.cursor()
125
-
126
- # Use executemany to insert multiple rows at once
127
- cursor.executemany('''
128
- INSERT OR IGNORE INTO results (gematria_sum, words, book, chapter, verse)
129
- VALUES (?, ?, ?, ?, ?)
130
- ''', phrases)
131
-
132
- # Commit the changes outside the loop for better performance
133
- conn.commit()
134
-
135
- def get_translation(phrase):
136
- """Retrieves or generates the English translation of a Hebrew phrase."""
137
- global translator, conn, translation_cache
138
- if phrase in translation_cache:
139
- return translation_cache[phrase]
140
- else:
141
- cursor = conn.cursor()
142
- cursor.execute('''
143
- SELECT english_translation FROM translations
144
- WHERE hebrew_phrase = ?
145
- ''', (phrase,))
146
- result = cursor.fetchone()
147
- if result and result[0]:
148
- translation = result[0]
149
- return translation
150
- else:
151
- translation = translate_and_store(phrase)
152
- cursor.execute('''
153
- INSERT OR IGNORE INTO translations (hebrew_phrase, english_translation)
154
- VALUES (?, ?)
155
- ''', (phrase, translation))
156
- return translation
157
-
158
- def translate_and_store(phrase):
159
- """Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
160
- global translator
161
- max_retries = 3
162
- retries = 0
163
-
164
- while retries < max_retries:
165
- try:
166
- translation = translator.translate(phrase)
167
- logging.debug(f"Translated phrase: {translation}")
168
- return translation
169
- except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
170
- exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
171
- retries += 1
172
- logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
173
-
174
- logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
175
- return "[Translation Error]"
176
-
177
- def search_gematria_in_db(gematria_sum, max_words):
178
- """Searches the database for phrases with a given Gematria value and word count.
179
- Returns phrases with word count <= max_words."""
180
- global conn
181
- cursor = conn.cursor()
182
- logging.debug(f"Searching for phrases with Gematria: {gematria_sum} and max words: {max_words}")
183
- cursor.execute('''
184
- SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
185
- ''', (gematria_sum,)) # Retrieve all matching phrases first
186
- results = cursor.fetchall()
187
- filtered_results = []
188
- logging.debug(f"Found {len(results)} matching phrases before filtering.")
189
- for words, book, chapter, verse in results:
190
- # Filter by word count (including phrases with fewer words)
191
- word_count = len(words.split()) # Correctly split and count words
192
- logging.debug(f"Word count for '{words}': {word_count}")
193
- if word_count <= max_words: # Include phrases with word count <= max_words
194
- filtered_results.append((words, book, chapter, verse))
195
- logging.debug(f"Found {len(filtered_results)} matching phrases after filtering.")
196
- return filtered_results
197
-
198
- def gematria_search_interface(phrase, max_words, show_translation):
199
- """The main function for the Gradio interface."""
200
- if not phrase.strip():
201
- return "Please enter a phrase."
202
-
203
- global conn, book_names, gematria_cache
204
- conn = sqlite3.connect('gematria.db')
205
- cursor = conn.cursor()
206
-
207
- phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
208
- logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
209
-
210
- # Debugging output
211
- logging.debug(f"Phrase Gematria: {phrase_gematria}")
212
- logging.debug(f"Max Words: {max_words}")
213
-
214
- # Check if Gematria is in cache for the specific max_words value
215
- if (phrase_gematria, max_words) in gematria_cache:
216
- matching_phrases = gematria_cache[(phrase_gematria, max_words)]
217
- logging.debug(f"Retrieved matching phrases from cache for max_words: {max_words}.")
218
  else:
219
- # Search in the database
220
- matching_phrases = search_gematria_in_db(phrase_gematria, max_words)
221
- # Cache the results with the max_words value
222
- gematria_cache[(phrase_gematria, max_words)] = matching_phrases
223
- logging.debug(f"Retrieved matching phrases from database for max_words: {max_words}.")
224
-
225
- if not matching_phrases:
226
- return "No matching phrases found."
227
-
228
- # Sort results by book, chapter, and verse
229
- sorted_phrases = sorted(matching_phrases, key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], x[3]))
230
- logging.debug(f"Sorted matching phrases: {sorted_phrases}")
231
-
232
- # Group results by book
233
- results_by_book = defaultdict(list)
234
- for words, book, chapter, verse in sorted_phrases:
235
- results_by_book[book].append((words, chapter, verse))
236
- logging.debug(f"Grouped results by book: {results_by_book}")
237
-
238
- # Format results for display
239
- results = []
240
- results.append("<div class='results-container'>")
241
- for book, phrases in results_by_book.items():
242
- results.append(f"<h4>Book: {book}</h4>") # Directly display book name
243
- for words, chapter, verse in phrases:
244
- translation = get_translation(words) if show_translation else ""
245
- link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
246
- results.append(f"""
247
- <div class='result-item'>
248
- <p>Chapter: {chapter}, Verse: {verse}</p>
249
- <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
250
- <p>Translation: {translation}</p>
251
- <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
252
- </div>
253
- """)
254
- results.append("</div>") # Close results-container div
255
-
256
- conn.close()
257
-
258
- # Add CSS styling
259
- style = """
260
- <style>
261
- .results-container {
262
- display: grid;
263
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
264
- gap: 20px;
265
- }
266
-
267
- .result-item {
268
- border: 1px solid #ccc;
269
- padding: 15px;
270
- border-radius: 5px;
271
- box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
272
- }
273
-
274
- .hebrew-phrase {
275
- font-family: 'SBL Hebrew', 'Ezra SIL', serif;
276
- direction: rtl;
277
- }
278
-
279
- .bible-link {
280
- display: block;
281
- margin-top: 10px;
282
- color: #007bff;
283
- text-decoration: none;
284
- }
285
- </style>
286
- """
287
-
288
- return style + "\n".join(results)
289
-
290
- def flatten_text(text):
291
- """Helper function to flatten nested lists into a single list."""
292
- if isinstance(text, list):
293
- return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
294
- return text
295
-
296
- def run_app():
297
- """Initializes and launches the Gradio app."""
298
- initialize_database()
299
- initialize_translator()
300
-
301
- # Pre-populate the database
302
- logging.info("Starting database population...")
303
- phrases_to_insert = [] # Collect phrases before inserting in bulk
304
- for max_phrase_length in range(1, 6): # Populate for phrases up to 5 words
305
- for gematria_sum, phrase, book, chapter, verse in tqdm(populate_database(1, 39, max_phrase_length=max_phrase_length), desc=f"Populating Database (Max Length: {max_phrase_length})"): # Books 1 to 39
306
- phrases_to_insert.append((gematria_sum, phrase, book, chapter, verse))
307
- if len(phrases_to_insert) >= 1000: # Insert in batches of 1000 for efficiency
308
- insert_phrases_to_db(phrases_to_insert)
309
- phrases_to_insert = []
310
- if phrases_to_insert: # Insert remaining phrases
311
- insert_phrases_to_db(phrases_to_insert)
312
- logging.info("Database population complete.")
313
-
314
- iface = gr.Interface(
315
- fn=gematria_search_interface,
316
- inputs=[
317
- gr.Textbox(label="Enter phrase"),
318
- gr.Number(label="Max Word Count in Results", value=1, minimum=1, maximum=10),
319
- gr.Checkbox(label="Show Translation", value=True)
320
- ],
321
- outputs=gr.HTML(label="Results"),
322
- title="Gematria Search in Tanach",
323
- description="Search for phrases in the Tanach that have the same Gematria value.",
324
- live=False,
325
- allow_flagging="never"
326
- )
327
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
  if __name__ == "__main__":
330
- run_app()
 
4
  import sqlite3
5
  import logging
6
  from collections import defaultdict
7
+ from typing import Tuple, Dict, List
8
+
9
  from util import process_json_files
10
  from gematria import calculate_gematria
11
  from deep_translator import GoogleTranslator, exceptions
12
  from urllib.parse import quote_plus
13
+ from tqdm import tqdm # Import tqdm for progress bars
14
+
15
+ # Constants
16
+ DATABASE_FILE = 'gematria.db'
17
+ MAX_PHRASE_LENGTH = 5 # Populate database for phrases up to 5 words
18
+ BATCH_SIZE = 1000 # Insert phrases into database in batches
19
 
20
  # Set up logging
21
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s')
22
 
23
+ # Global variables
24
+ conn: sqlite3.Connection = None
25
+ translator: GoogleTranslator = None
26
+ book_names: Dict[int, str] = {}
27
+ gematria_cache: Dict[Tuple[int, int], List[Tuple[str, str, int, int]]] = {}
28
+ translation_cache: Dict[str, str] = {}
29
+
30
+ def initialize_database() -> None:
31
+ """Initializes the SQLite database."""
32
+ global conn
33
+ conn = sqlite3.connect(DATABASE_FILE, isolation_level=None) # Autocommit for faster insertion
34
+ cursor = conn.cursor()
35
+
36
+ # Create tables if they don't exist
37
+ cursor.execute('''
38
+ CREATE TABLE IF NOT EXISTS results (
39
+ gematria_sum INTEGER,
40
+ words TEXT,
41
+ translation TEXT,
42
+ book TEXT,
43
+ chapter INTEGER,
44
+ verse INTEGER,
45
+ PRIMARY KEY (gematria_sum, words, book, chapter, verse)
46
+ )
47
+ ''')
48
+ cursor.execute('''
49
+ CREATE TABLE IF NOT EXISTS processed_books (
50
+ book TEXT PRIMARY KEY,
51
+ max_phrase_length INTEGER
52
+ )
53
+ ''')
54
+ cursor.execute('''
55
+ CREATE TABLE IF NOT EXISTS translations (
56
+ hebrew_phrase TEXT PRIMARY KEY,
57
+ english_translation TEXT
58
+ )
59
+ ''')
60
+
61
+ def initialize_translator() -> None:
62
+ """Initializes the Google Translator."""
63
+ global translator
64
+ translator = GoogleTranslator(source='iw', target='en')
65
+ logging.info("Translator initialized.")
66
+
67
+ def populate_database(start_book: int, end_book: int, max_phrase_length: int = 1) -> None:
68
+ """Populates the database with phrases from the Tanach and their Gematria values."""
69
+ global conn, book_names
70
+ logging.info(f"Populating database with books from {start_book} to {end_book}...")
71
+ cursor = conn.cursor()
72
+
73
+ for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"):
74
+ book_data = process_json_files(book_id, book_id) # Get data for the single book
75
+
76
+ # process_json_files returns a dictionary with book_id as key,
77
+ # so access the book data directly
78
+ if book_id in book_data:
79
+ book_data = book_data[book_id]
80
+ if 'title' not in book_data or not isinstance(book_data['title'], str):
81
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'title' field.")
82
+ continue
83
+
84
+ title = book_data['title']
85
+ book_names[book_id] = title
86
+
87
+ # Check if the book is already processed for this max_phrase_length
88
+ cursor.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (title,))
89
+ result = cursor.fetchone()
90
+ if result and result[0] >= max_phrase_length:
91
+ logging.info(f"Skipping book {title}: Already processed with max_phrase_length {result[0]}")
92
+ continue
93
+
94
+ logging.info(f"Processing book {title} with max_phrase_length {max_phrase_length}")
95
+
96
+ if 'text' not in book_data or not isinstance(book_data['text'], list):
97
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
98
+ continue
99
+
100
+ chapters = book_data['text']
101
+ # Faster iteration with enumerate and list comprehension
102
+ for chapter_id, chapter in enumerate(chapters):
103
+ for verse_id, verse in enumerate(chapter):
104
+ verse_text = flatten_text(verse)
105
+ # Remove text in square brackets and non-Hebrew characters
106
+ verse_text = re.sub(r'\[.*?\]', '', verse_text)
107
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
108
+ verse_text = re.sub(r" +", " ", verse_text)
109
+ words = verse_text.split()
110
+
111
+ # Use a generator to avoid building large lists in memory
112
+ for length in range(1, max_phrase_length + 1):
113
+ for start in range(len(words) - length + 1):
114
+ phrase_candidate = " ".join(words[start:start + length])
115
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
116
+ yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
117
+
118
+ # Mark the book as processed with the current max_phrase_length
119
+ cursor.execute('''
120
+ INSERT OR REPLACE INTO processed_books (book, max_phrase_length)
121
+ VALUES (?, ?)
122
+ ''', (title, max_phrase_length))
123
+
124
+ def insert_phrases_to_db(phrases: List[Tuple[int, str, str, int, int]]) -> None:
125
+ """Inserts a list of phrases into the database efficiently."""
126
+ global conn
127
+ cursor = conn.cursor()
128
+
129
+ # Use executemany to insert multiple rows at once
130
+ cursor.executemany('''
131
+ INSERT OR IGNORE INTO results (gematria_sum, words, book, chapter, verse)
132
+ VALUES (?, ?, ?, ?, ?)
133
+ ''', phrases)
134
+
135
+ # Commit the changes outside the loop for better performance
136
+ conn.commit()
137
+
138
+ def get_translation(phrase: str) -> str:
139
+ """Retrieves or generates the English translation of a Hebrew phrase."""
140
+ global translator, conn, translation_cache
141
+ if phrase in translation_cache:
142
+ return translation_cache[phrase]
143
+ else:
144
  cursor = conn.cursor()
 
 
145
  cursor.execute('''
146
+ SELECT english_translation FROM translations
147
+ WHERE hebrew_phrase = ?
148
+ ''', (phrase,))
149
+ result = cursor.fetchone()
150
+ if result and result[0]:
151
+ translation = result[0]
152
+ return translation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  else:
154
+ translation = translate_and_store(phrase)
155
+ cursor.execute('''
156
+ INSERT OR IGNORE INTO translations (hebrew_phrase, english_translation)
157
+ VALUES (?, ?)
158
+ ''', (phrase, translation))
159
+ return translation
160
+
161
+ def translate_and_store(phrase: str) -> str:
162
+ """Translates a Hebrew phrase to English using Google Translate and handles potential errors."""
163
+ global translator
164
+ max_retries = 3
165
+ retries = 0
166
+
167
+ while retries < max_retries:
168
+ try:
169
+ translation = translator.translate(phrase)
170
+ logging.debug(f"Translated phrase: {translation}")
171
+ return translation
172
+ except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
173
+ exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
174
+ retries += 1
175
+ logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
176
+
177
+ logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
178
+ return "[Translation Error]"
179
+
180
+ def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int]]:
181
+ """Searches the database for phrases with a given Gematria value and word count.
182
+ Returns phrases with word count <= max_words."""
183
+ global conn
184
+ cursor = conn.cursor()
185
+ logging.debug(f"Searching for phrases with Gematria: {gematria_sum} and max words: {max_words}")
186
+ cursor.execute('''
187
+ SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
188
+ ''', (gematria_sum,)) # Retrieve all matching phrases first
189
+ results = cursor.fetchall()
190
+ filtered_results = []
191
+ logging.debug(f"Found {len(results)} matching phrases before filtering.")
192
+ for words, book, chapter, verse in results:
193
+ # Filter by word count (including phrases with fewer words)
194
+ word_count = len(words.split()) # Correctly split and count words
195
+ logging.debug(f"Word count for '{words}': {word_count}")
196
+ if word_count <= max_words: # Include phrases with word count <= max_words
197
+ filtered_results.append((words, book, chapter, verse))
198
+ logging.debug(f"Found {len(filtered_results)} matching phrases after filtering.")
199
+ return filtered_results
200
+
201
+ def gematria_search_interface(phrase: str, max_words: int, show_translation: bool) -> str:
202
+ """The main function for the Gradio interface."""
203
+ if not phrase.strip():
204
+ return "Please enter a phrase."
205
+
206
+ global conn, book_names, gematria_cache
207
+ conn = sqlite3.connect(DATABASE_FILE)
208
+ cursor = conn.cursor()
209
+
210
+ # Extract numbers from the input text
211
+ numbers = re.findall(r'\d+', phrase)
212
+ # Calculate Gematria for the remaining text (non-numbers)
213
+ text_without_numbers = re.sub(r'\d+', '', phrase)
214
+ phrase_gematria = calculate_gematria(text_without_numbers.replace(" ", ""))
215
+
216
+ # Add sum of numbers to Gematria
217
+ phrase_gematria += sum(int(number) for number in numbers)
218
+
219
+ logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
220
+
221
+ # Debugging output
222
+ logging.debug(f"Phrase Gematria: {phrase_gematria}")
223
+ logging.debug(f"Max Words: {max_words}")
224
+
225
+ # Check if Gematria is in cache for the specific max_words value
226
+ if (phrase_gematria, max_words) in gematria_cache:
227
+ matching_phrases = gematria_cache[(phrase_gematria, max_words)]
228
+ logging.debug(f"Retrieved matching phrases from cache for max_words: {max_words}.")
229
+ else:
230
+ # Search in the database
231
+ matching_phrases = search_gematria_in_db(phrase_gematria, max_words)
232
+ # Cache the results with the max_words value
233
+ gematria_cache[(phrase_gematria, max_words)] = matching_phrases
234
+ logging.debug(f"Retrieved matching phrases from database for max_words: {max_words}.")
235
+
236
+ if not matching_phrases:
237
+ return "No matching phrases found."
238
+
239
+ # Sort results by book, chapter, and verse
240
+ sorted_phrases = sorted(matching_phrases, key=lambda x: (int(list(book_names.keys())[list(book_names.values()).index(x[1])]), x[2], x[3]))
241
+ logging.debug(f"Sorted matching phrases: {sorted_phrases}")
242
+
243
+ # Group results by book
244
+ results_by_book = defaultdict(list)
245
+ for words, book, chapter, verse in sorted_phrases:
246
+ results_by_book[book].append((words, chapter, verse))
247
+ logging.debug(f"Grouped results by book: {results_by_book}")
248
+
249
+ # Format results for display
250
+ results = []
251
+ results.append("<div class='results-container'>")
252
+ for book, phrases in results_by_book.items():
253
+ results.append(f"<h4>Book: {book}</h4>") # Directly display book name
254
+ for words, chapter, verse in phrases:
255
+ translation = get_translation(words) if show_translation else ""
256
+ link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
257
+ results.append(f"""
258
+ <div class='result-item'>
259
+ <p>Chapter: {chapter}, Verse: {verse}</p>
260
+ <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
261
+ <p>Translation: {translation}</p>
262
+ <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
263
+ </div>
264
+ """)
265
+ results.append("</div>") # Close results-container div
266
+
267
+ conn.close()
268
+
269
+ # Add CSS styling
270
+ style = """
271
+ <style>
272
+ .results-container {
273
+ display: grid;
274
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
275
+ gap: 20px;
276
+ }
277
+
278
+ .result-item {
279
+ border: 1px solid #ccc;
280
+ padding: 15px;
281
+ border-radius: 5px;
282
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
283
+ }
284
+
285
+ .hebrew-phrase {
286
+ font-family: 'SBL Hebrew', 'Ezra SIL', serif;
287
+ direction: rtl;
288
+ }
289
+
290
+ .bible-link {
291
+ display: block;
292
+ margin-top: 10px;
293
+ color: #007bff;
294
+ text-decoration: none;
295
+ }
296
+ </style>
297
+ """
298
+
299
+ return style + "\n".join(results)
300
+
301
+ def flatten_text(text: List) -> str:
302
+ """Helper function to flatten nested lists into a single list."""
303
+ if isinstance(text, list):
304
+ return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
305
+ return text
306
+
307
+ def run_app() -> None:
308
+ """Initializes and launches the Gradio app."""
309
+ initialize_database()
310
+ initialize_translator()
311
+
312
+ # Pre-populate the database
313
+ logging.info("Starting database population...")
314
+ phrases_to_insert = [] # Collect phrases before inserting in bulk
315
+ for max_phrase_length in range(1, MAX_PHRASE_LENGTH + 1): # Populate for phrases up to MAX_PHRASE_LENGTH words
316
+ for gematria_sum, phrase, book, chapter, verse in tqdm(populate_database(1, 39, max_phrase_length=max_phrase_length), desc=f"Populating Database (Max Length: {max_phrase_length})"): # Books 1 to 39
317
+ phrases_to_insert.append((gematria_sum, phrase, book, chapter, verse))
318
+ if len(phrases_to_insert) >= BATCH_SIZE: # Insert in batches of BATCH_SIZE for efficiency
319
+ insert_phrases_to_db(phrases_to_insert)
320
+ phrases_to_insert = []
321
+ if phrases_to_insert: # Insert remaining phrases
322
+ insert_phrases_to_db(phrases_to_insert)
323
+ logging.info("Database population complete.")
324
+
325
+ iface = gr.Interface(
326
+ fn=gematria_search_interface,
327
+ inputs=[
328
+ gr.Textbox(label="Enter word(s) or numbers (e.g., 'abc', '888' or 'abc 111 777')"),
329
+ gr.Number(label="Max Word Count in Result Phrases", value=1, minimum=1, maximum=10),
330
+ gr.Checkbox(label="Show Translation", value=True)
331
+ ],
332
+ outputs=gr.HTML(label="Results"),
333
+ title="Gematria Search in Tanach",
334
+ description="Search for phrases and/or numbers in the Tanach that have the same Gematria value.",
335
+ live=False,
336
+ allow_flagging="never"
337
+ )
338
+ iface.launch()
339
 
340
  if __name__ == "__main__":
341
+ run_app()