bartman081523 commited on
Commit
c1f45eb
1 Parent(s): e3a23f6

populate db with 3-word phrases

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +68 -58
  3. gematria.db +2 -2
  4. gematria.db-journal +3 -0
.gitattributes CHANGED
@@ -1 +1,2 @@
1
  gematria.db filter=lfs diff=lfs merge=lfs -text
 
 
1
  gematria.db filter=lfs diff=lfs merge=lfs -text
2
+ gematria.db-journal filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -8,19 +8,23 @@ from util import process_json_files
8
  from gematria import calculate_gematria
9
  from deep_translator import GoogleTranslator, exceptions
10
  from urllib.parse import quote_plus
 
11
 
12
  # Set up logging
13
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
  # Global variables for database connection, translator, and book names
16
  conn = None
17
  translator = None
18
  book_names = {}
19
 
 
 
 
20
  def initialize_database():
21
  """Initializes the SQLite database."""
22
  global conn
23
- conn = sqlite3.connect('gematria.db')
24
  cursor = conn.cursor()
25
 
26
  # Create tables if they don't exist
@@ -29,20 +33,18 @@ def initialize_database():
29
  gematria_sum INTEGER,
30
  words TEXT,
31
  translation TEXT,
32
- book TEXT, -- Store book name directly
33
  chapter INTEGER,
34
  verse INTEGER,
35
- PRIMARY KEY (gematria_sum, book, chapter, verse)
36
  )
37
  ''')
38
  cursor.execute('''
39
  CREATE TABLE IF NOT EXISTS processed_books (
40
- book TEXT PRIMARY KEY, -- Store book name directly
41
  max_phrase_length INTEGER
42
  )
43
  ''')
44
- conn.commit()
45
- logging.info("Database initialized.")
46
 
47
  def initialize_translator():
48
  """Initializes the Google Translator."""
@@ -56,7 +58,7 @@ def populate_database(start_book, end_book, max_phrase_length=1):
56
  logging.info(f"Populating database with books from {start_book} to {end_book}...")
57
  cursor = conn.cursor()
58
 
59
- for book_id in range(start_book, end_book + 1):
60
  book_data = process_json_files(book_id, book_id) # Get data for the single book
61
 
62
  # process_json_files returns a dictionary with book_id as key,
@@ -84,43 +86,36 @@ def populate_database(start_book, end_book, max_phrase_length=1):
84
  continue
85
 
86
  chapters = book_data['text']
 
87
  for chapter_id, chapter in enumerate(chapters):
88
- if not isinstance(chapter, list):
89
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
90
- continue
91
  for verse_id, verse in enumerate(chapter):
92
  verse_text = flatten_text(verse)
93
- # Remove text in square brackets
94
  verse_text = re.sub(r'\[.*?\]', '', verse_text)
95
  verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
96
  verse_text = re.sub(r" +", " ", verse_text)
97
  words = verse_text.split()
98
 
99
- # Iterate through phrases of different lengths
100
  for length in range(1, max_phrase_length + 1):
101
  for start in range(len(words) - length + 1):
102
  phrase_candidate = " ".join(words[start:start + length])
103
  gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
104
- insert_phrase_to_db(gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1)
105
 
106
- # Mark the book as processed for this max_phrase_length
107
- cursor.execute('''INSERT OR REPLACE INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (title, max_phrase_length))
108
- conn.commit()
109
- logging.info("Database population complete.")
110
-
111
- def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
112
- """Inserts a phrase and its Gematria value into the database."""
113
  global conn
114
  cursor = conn.cursor()
115
- try:
116
- cursor.execute('''
117
- INSERT INTO results (gematria_sum, words, book, chapter, verse)
118
- VALUES (?, ?, ?, ?, ?)
119
- ''', (gematria_sum, phrase_candidate, book, chapter, verse))
120
- conn.commit()
121
- logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
122
- except sqlite3.IntegrityError:
123
- logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
124
 
125
  def get_translation(phrase):
126
  """Retrieves or generates the English translation of a Hebrew phrase."""
@@ -136,11 +131,10 @@ def get_translation(phrase):
136
  else:
137
  translation = translate_and_store(phrase)
138
  cursor.execute('''
139
- UPDATE results
140
- SET translation = ?
141
- WHERE words = ?
142
  ''', (translation, phrase))
143
- conn.commit()
144
  return translation
145
 
146
  def translate_and_store(phrase):
@@ -178,14 +172,22 @@ def gematria_search_interface(phrase):
178
  if not phrase.strip():
179
  return "Please enter a phrase."
180
 
181
- global conn, book_names
182
  conn = sqlite3.connect('gematria.db')
183
  cursor = conn.cursor()
184
 
185
  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
186
  logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
187
 
188
- matching_phrases = search_gematria_in_db(phrase_gematria)
 
 
 
 
 
 
 
 
189
  if not matching_phrases:
190
  return "No matching phrases found."
191
 
@@ -206,13 +208,13 @@ def gematria_search_interface(phrase):
206
  translation = get_translation(words)
207
  link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
208
  results.append(f"""
209
- <div class='result-item'>
210
- <p>Chapter: {chapter}, Verse: {verse}</p>
211
- <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
212
- <p>Translation: {translation}</p>
213
- <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
214
- </div>
215
- """)
216
  results.append("</div>") # Close results-container div
217
 
218
  conn.close()
@@ -221,28 +223,28 @@ def gematria_search_interface(phrase):
221
  style = """
222
  <style>
223
  .results-container {
224
- display: grid;
225
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
226
- gap: 20px;
227
  }
228
 
229
  .result-item {
230
- border: 1px solid #ccc;
231
- padding: 15px;
232
- border-radius: 5px;
233
- box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
234
  }
235
 
236
  .hebrew-phrase {
237
- font-family: 'SBL Hebrew', 'Ezra SIL', serif;
238
- direction: rtl;
239
  }
240
 
241
  .bible-link {
242
- display: block;
243
- margin-top: 10px;
244
- color: #007bff;
245
- text-decoration: none;
246
  }
247
  </style>
248
  """
@@ -261,8 +263,16 @@ def run_app():
261
  initialize_translator()
262
 
263
  # Pre-populate the database
264
- populate_database(1, 39, max_phrase_length=12) # Books 1 to 39 (adjust as needed)
265
- #opulate_database(27, 27, max_phrase_length=31) # Book 27 (Psalms) - adjust as needed
 
 
 
 
 
 
 
 
266
 
267
  iface = gr.Interface(
268
  fn=gematria_search_interface,
 
8
  from gematria import calculate_gematria
9
  from deep_translator import GoogleTranslator, exceptions
10
  from urllib.parse import quote_plus
11
+ from tqdm import tqdm # Import tqdm for progress bars
12
 
13
  # Set up logging
14
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(filename)s - %(lineno)d - %(message)s')
15
 
16
  # Global variables for database connection, translator, and book names
17
  conn = None
18
  translator = None
19
  book_names = {}
20
 
21
+ # Pre-load Gematria values for common phrases to speed up search
22
+ gematria_cache = {}
23
+
24
  def initialize_database():
25
  """Initializes the SQLite database."""
26
  global conn
27
+ conn = sqlite3.connect('gematria.db', isolation_level=None) # Autocommit for faster insertion
28
  cursor = conn.cursor()
29
 
30
  # Create tables if they don't exist
 
33
  gematria_sum INTEGER,
34
  words TEXT,
35
  translation TEXT,
36
+ book TEXT,
37
  chapter INTEGER,
38
  verse INTEGER,
39
+ PRIMARY KEY (gematria_sum, words, book, chapter, verse)
40
  )
41
  ''')
42
  cursor.execute('''
43
  CREATE TABLE IF NOT EXISTS processed_books (
44
+ book TEXT PRIMARY KEY,
45
  max_phrase_length INTEGER
46
  )
47
  ''')
 
 
48
 
49
  def initialize_translator():
50
  """Initializes the Google Translator."""
 
58
  logging.info(f"Populating database with books from {start_book} to {end_book}...")
59
  cursor = conn.cursor()
60
 
61
+ for book_id in tqdm(range(start_book, end_book + 1), desc="Processing Books"):
62
  book_data = process_json_files(book_id, book_id) # Get data for the single book
63
 
64
  # process_json_files returns a dictionary with book_id as key,
 
86
  continue
87
 
88
  chapters = book_data['text']
89
+ # Faster iteration with enumerate and list comprehension
90
  for chapter_id, chapter in enumerate(chapters):
 
 
 
91
  for verse_id, verse in enumerate(chapter):
92
  verse_text = flatten_text(verse)
93
+ # Remove text in square brackets and non-Hebrew characters
94
  verse_text = re.sub(r'\[.*?\]', '', verse_text)
95
  verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
96
  verse_text = re.sub(r" +", " ", verse_text)
97
  words = verse_text.split()
98
 
99
+ # Use a generator to avoid building large lists in memory
100
  for length in range(1, max_phrase_length + 1):
101
  for start in range(len(words) - length + 1):
102
  phrase_candidate = " ".join(words[start:start + length])
103
  gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
104
+ yield gematria_sum, phrase_candidate, title, chapter_id + 1, verse_id + 1
105
 
106
+ def insert_phrases_to_db(phrases):
107
+ """Inserts a list of phrases into the database efficiently."""
 
 
 
 
 
108
  global conn
109
  cursor = conn.cursor()
110
+
111
+ # Use executemany to insert multiple rows at once
112
+ cursor.executemany('''
113
+ INSERT OR IGNORE INTO results (gematria_sum, words, book, chapter, verse)
114
+ VALUES (?, ?, ?, ?, ?)
115
+ ''', phrases)
116
+
117
+ # Commit the changes outside the loop for better performance
118
+ conn.commit()
119
 
120
  def get_translation(phrase):
121
  """Retrieves or generates the English translation of a Hebrew phrase."""
 
131
  else:
132
  translation = translate_and_store(phrase)
133
  cursor.execute('''
134
+ UPDATE results
135
+ SET translation = ?
136
+ WHERE words = ?
137
  ''', (translation, phrase))
 
138
  return translation
139
 
140
  def translate_and_store(phrase):
 
172
  if not phrase.strip():
173
  return "Please enter a phrase."
174
 
175
+ global conn, book_names, gematria_cache
176
  conn = sqlite3.connect('gematria.db')
177
  cursor = conn.cursor()
178
 
179
  phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
180
  logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
181
 
182
+ # Check if Gematria is in cache
183
+ if phrase_gematria in gematria_cache:
184
+ matching_phrases = gematria_cache[phrase_gematria]
185
+ else:
186
+ # Search in the database
187
+ matching_phrases = search_gematria_in_db(phrase_gematria)
188
+ # Cache the results for future searches
189
+ gematria_cache[phrase_gematria] = matching_phrases
190
+
191
  if not matching_phrases:
192
  return "No matching phrases found."
193
 
 
208
  translation = get_translation(words)
209
  link = f"https://www.biblegateway.com/passage/?search={quote_plus(book)}+{chapter}%3A{verse}&version=CJB"
210
  results.append(f"""
211
+ <div class='result-item'>
212
+ <p>Chapter: {chapter}, Verse: {verse}</p>
213
+ <p class='hebrew-phrase'>Hebrew Phrase: {words}</p>
214
+ <p>Translation: {translation}</p>
215
+ <a href='{link}' target='_blank' class='bible-link'>[See on Bible Gateway]</a>
216
+ </div>
217
+ """)
218
  results.append("</div>") # Close results-container div
219
 
220
  conn.close()
 
223
  style = """
224
  <style>
225
  .results-container {
226
+ display: grid;
227
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
228
+ gap: 20px;
229
  }
230
 
231
  .result-item {
232
+ border: 1px solid #ccc;
233
+ padding: 15px;
234
+ border-radius: 5px;
235
+ box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1);
236
  }
237
 
238
  .hebrew-phrase {
239
+ font-family: 'SBL Hebrew', 'Ezra SIL', serif;
240
+ direction: rtl;
241
  }
242
 
243
  .bible-link {
244
+ display: block;
245
+ margin-top: 10px;
246
+ color: #007bff;
247
+ text-decoration: none;
248
  }
249
  </style>
250
  """
 
263
  initialize_translator()
264
 
265
  # Pre-populate the database
266
+ logging.info("Starting database population...")
267
+ phrases_to_insert = [] # Collect phrases before inserting in bulk
268
+ for gematria_sum, phrase, book, chapter, verse in tqdm(populate_database(1, 39, max_phrase_length=3), desc="Populating Database"): # Books 1 to 39
269
+ phrases_to_insert.append((gematria_sum, phrase, book, chapter, verse))
270
+ if len(phrases_to_insert) >= 1000: # Insert in batches of 1000 for efficiency
271
+ insert_phrases_to_db(phrases_to_insert)
272
+ phrases_to_insert = []
273
+ if phrases_to_insert: # Insert remaining phrases
274
+ insert_phrases_to_db(phrases_to_insert)
275
+ logging.info("Database population complete.")
276
 
277
  iface = gr.Interface(
278
  fn=gematria_search_interface,
gematria.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9d0db3cd97140e47b091845fc11daaa261164db0ac4aa8e430e0c445da27851
3
- size 171667456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ae62432a24f36453d7080be5d95162d949ac77f006db0a0a81000c40abcabf
3
+ size 65466368
gematria.db-journal ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2ff707a32b432b7883756f3d0db45fc3031ffefa7f890197ac1d9bed4fb9849
3
+ size 3884488