neuralworm commited on
Commit
a8a8bc2
·
1 Parent(s): d7a708b

quit async experiment

Browse files
Files changed (1) hide show
  1. app.py +137 -143
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
  import json
3
  import re
4
- import sqlite3
5
- import logging
6
  import asyncio
 
 
7
  from collections import defaultdict
8
  from util import process_json_files
9
  from gematria import calculate_gematria
@@ -13,13 +13,11 @@ from urllib.parse import quote_plus
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
- # Global variables for database connection and translator
17
- conn = None
18
  translator = None
19
  book_names = {} # Dictionary to store book names
20
-
21
- # Global variable to track ongoing search tasks
22
- ongoing_search_task = None
23
 
24
  def flatten_text(text):
25
  """Helper function to flatten nested lists into a single list."""
@@ -27,29 +25,30 @@ def flatten_text(text):
27
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
28
  return text
29
 
30
- def initialize_database():
31
- """Initializes the SQLite database."""
32
  global conn
33
- conn = sqlite3.connect('gematria.db')
34
- c = conn.cursor()
35
- c.execute('''
36
- CREATE TABLE IF NOT EXISTS results (
37
- gematria_sum INTEGER,
38
- words TEXT UNIQUE,
39
- translation TEXT,
40
- book INTEGER,
41
- chapter INTEGER,
42
- verse INTEGER,
43
- PRIMARY KEY (words, book, chapter, verse)
44
- )
45
- ''')
46
- c.execute('''
47
- CREATE TABLE IF NOT EXISTS processed_books (
48
- book INTEGER PRIMARY KEY,
49
- max_phrase_length INTEGER
50
- )
51
- ''')
52
- conn.commit()
 
53
  logging.info("Database initialized.")
54
 
55
  def initialize_translator():
@@ -58,93 +57,89 @@ def initialize_translator():
58
  translator = GoogleTranslator(source='iw', target='en')
59
  logging.info("Translator initialized.")
60
 
61
- def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
62
  """Inserts a phrase and its Gematria value into the database."""
63
- global conn
64
- c = conn.cursor()
65
- try:
66
- c.execute('''
67
- INSERT INTO results (gematria_sum, words, book, chapter, verse)
68
- VALUES (?, ?, ?, ?, ?)
69
- ''', (gematria_sum, phrase_candidate, book, chapter, verse))
70
- conn.commit()
71
- logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
72
- except sqlite3.IntegrityError:
73
- logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
74
-
75
- async def populate_database_async(tanach_texts, max_phrase_length=1):
76
- """Asynchronous version of populate_database for concurrent execution."""
77
- # Database operations and logging are not thread-safe, so we run them in the main thread
78
- await asyncio.to_thread(populate_database, tanach_texts, max_phrase_length)
79
-
80
- def populate_database(tanach_texts, max_phrase_length=1):
81
- """Populates the database with phrases from the Tanach and their Gematria values."""
82
- global conn, book_names
83
  logging.info("Populating database...")
84
- c = conn.cursor()
85
-
86
- for book_id, text in tanach_texts: # Unpack the tuple (book_id, text)
87
- c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
88
- result = c.fetchone()
89
- if result and result[0] >= max_phrase_length:
90
- logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
91
- continue
92
-
93
- logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
94
- if 'text' not in text or not isinstance(text['text'], list):
95
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
96
- continue
97
-
98
- title = text.get('title', 'Unknown')
99
- book_names[book_id] = title # Store book name
100
-
101
- chapters = text['text']
102
- for chapter_id, chapter in enumerate(chapters):
103
- if not isinstance(chapter, list):
104
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
105
  continue
106
- for verse_id, verse in enumerate(chapter):
107
- verse_text = flatten_text(verse)
108
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
109
- verse_text = re.sub(r" +", " ", verse_text)
110
- words = verse_text.split()
111
- for length in range(1, max_phrase_length + 1):
112
- for start in range(len(words) - length + 1):
113
- phrase_candidate = " ".join(words[start:start + length])
114
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
115
- insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
116
- try:
117
- c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
118
- except sqlite3.IntegrityError:
119
- c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
120
- conn.commit()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  logging.info("Database population complete.")
122
 
123
- def get_translation(phrase):
124
  """Retrieves or generates the English translation of a Hebrew phrase."""
125
  global translator, conn
126
- c = conn.cursor()
127
- c.execute('''
128
- SELECT translation FROM results
129
- WHERE words = ?
130
- ''', (phrase,))
131
- result = c.fetchone()
132
- if result and result[0]:
133
- return result[0]
134
- else:
135
- translation = translate_and_store(phrase)
136
- c.execute('''
137
- UPDATE results
138
- SET translation = ?
139
  WHERE words = ?
140
- ''', (translation, phrase))
141
- conn.commit()
142
- return translation
143
-
 
 
 
 
 
 
 
 
 
144
 
145
  def translate_and_store(phrase):
146
  global translator
147
- max_retries = 3 # You can adjust the number of retries
148
  retries = 0
149
 
150
  while retries < max_retries:
@@ -153,25 +148,25 @@ def translate_and_store(phrase):
153
  logging.debug(f"Translated phrase: {translation}")
154
  return translation
155
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
156
- exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e: # Add ConnectionError
157
  retries += 1
158
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
159
 
160
  logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
161
- return "[Translation Error]"
162
 
163
- def search_gematria_in_db(gematria_sum):
164
  """Searches the database for phrases with a given Gematria value."""
165
- global conn
166
- c = conn.cursor()
167
- c.execute('''
168
- SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
169
- ''', (gematria_sum,))
170
- results = c.fetchall()
171
- logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
172
- return results
173
-
174
- def gematria_search_interface(phrase):
175
  """The main function for the Gradio interface."""
176
  global ongoing_search_task, conn, book_names
177
 
@@ -182,16 +177,9 @@ def gematria_search_interface(phrase):
182
  if ongoing_search_task is not None and not ongoing_search_task.done():
183
  ongoing_search_task.cancel()
184
 
185
- # Create database connection inside the function
186
- conn = sqlite3.connect('gematria.db')
187
- c = conn.cursor()
188
-
189
- phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
190
- logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
191
-
192
  # Start the search asynchronously
193
  async def search_task():
194
- matching_phrases = search_gematria_in_db(phrase_gematria)
195
 
196
  if not matching_phrases:
197
  return "No matching phrases found."
@@ -208,7 +196,7 @@ def gematria_search_interface(phrase):
208
  for book, phrases in results_by_book.items():
209
  results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
210
  for words, chapter, verse in phrases:
211
- translation = get_translation(words)
212
  book_name_english = book_names.get(book, 'Unknown')
213
  link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
214
 
@@ -254,25 +242,29 @@ def gematria_search_interface(phrase):
254
 
255
  return style + "\n".join(results)
256
 
257
- ongoing_search_task = asyncio.create_task(search_task())
258
- return await ongoing_search_task
 
259
 
260
- def run_app():
261
- """Initializes and launches the Gradio app."""
262
- initialize_database()
 
263
  initialize_translator()
264
 
265
- # Start database population in the background
266
- tanach_texts = process_json_files(1, 39) # Process all books
267
- asyncio.create_task(populate_database_async(tanach_texts, max_phrase_length=1))
268
-
269
- tanach_texts = process_json_files(1, 1) # Process all books
270
- asyncio.create_task(populate_database_async(tanach_texts, max_phrase_length=4))
271
-
272
- tanach_texts = process_json_files(27, 27) # Process all books
273
- asyncio.create_task(populate_database_async(tanach_texts, max_phrase_length=4))
274
 
 
 
275
 
 
276
  iface = gr.Interface(
277
  fn=gematria_search_interface,
278
  inputs=gr.Textbox(label="Enter phrase"),
@@ -280,9 +272,11 @@ def run_app():
280
  title="Gematria Search in Tanach",
281
  description="Search for phrases in the Tanach that have the same Gematria value.",
282
  live=False,
283
- allow_flagging="never"
 
284
  )
285
- iface.launch()
 
286
 
287
  if __name__ == "__main__":
288
  asyncio.run(run_app())
 
1
  import gradio as gr
2
  import json
3
  import re
 
 
4
  import asyncio
5
+ import aiosqlite
6
+ import logging
7
  from collections import defaultdict
8
  from util import process_json_files
9
  from gematria import calculate_gematria
 
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
+ # Global variables
17
+ conn = None # Database connection (will be initialized asynchronously)
18
  translator = None
19
  book_names = {} # Dictionary to store book names
20
+ ongoing_search_task = None # Track ongoing search tasks
 
 
21
 
22
  def flatten_text(text):
23
  """Helper function to flatten nested lists into a single list."""
 
25
  return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
26
  return text
27
 
28
+ async def initialize_database():
29
+ """Initializes the SQLite database asynchronously."""
30
  global conn
31
+ conn = await aiosqlite.connect('gematria.db')
32
+ async with conn:
33
+ c = await conn.cursor()
34
+ await c.execute('''
35
+ CREATE TABLE IF NOT EXISTS results (
36
+ gematria_sum INTEGER,
37
+ words TEXT UNIQUE,
38
+ translation TEXT,
39
+ book INTEGER,
40
+ chapter INTEGER,
41
+ verse INTEGER,
42
+ PRIMARY KEY (words, book, chapter, verse)
43
+ )
44
+ ''')
45
+ await c.execute('''
46
+ CREATE TABLE IF NOT EXISTS processed_books (
47
+ book INTEGER PRIMARY KEY,
48
+ max_phrase_length INTEGER
49
+ )
50
+ ''')
51
+ await conn.commit()
52
  logging.info("Database initialized.")
53
 
54
  def initialize_translator():
 
57
  translator = GoogleTranslator(source='iw', target='en')
58
  logging.info("Translator initialized.")
59
 
60
+ async def insert_phrase_to_db(conn, gematria_sum, phrase_candidate, book, chapter, verse):
61
  """Inserts a phrase and its Gematria value into the database."""
62
+ async with conn:
63
+ c = await conn.cursor()
64
+ try:
65
+ await c.execute('''
66
+ INSERT INTO results (gematria_sum, words, book, chapter, verse)
67
+ VALUES (?, ?, ?, ?, ?)
68
+ ''', (gematria_sum, phrase_candidate, book, chapter, verse))
69
+ await conn.commit()
70
+ logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
71
+ except aiosqlite.IntegrityError:
72
+ logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
73
+
74
+ async def populate_database_async(conn, tanach_texts, max_phrase_length=1):
75
+ """Asynchronous version of populate_database using aiosqlite."""
76
+ global book_names
 
 
 
 
 
77
  logging.info("Populating database...")
78
+ async with conn:
79
+ c = await conn.cursor()
80
+
81
+ for book_id, text in tanach_texts:
82
+ await c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
83
+ result = await c.fetchone()
84
+ if result and result[0] >= max_phrase_length:
85
+ logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  continue
87
+
88
+ logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
89
+ if 'text' not in text or not isinstance(text['text'], list):
90
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
91
+ continue
92
+
93
+ title = text.get('title', 'Unknown')
94
+ book_names[book_id] = title
95
+
96
+ chapters = text['text']
97
+ for chapter_id, chapter in enumerate(chapters):
98
+ if not isinstance(chapter, list):
99
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
100
+ continue
101
+ for verse_id, verse in enumerate(chapter):
102
+ verse_text = flatten_text(verse)
103
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
104
+ verse_text = re.sub(r" +", " ", verse_text)
105
+ words = verse_text.split()
106
+ for length in range(1, max_phrase_length + 1):
107
+ for start in range(len(words) - length + 1):
108
+ phrase_candidate = " ".join(words[start:start + length])
109
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
110
+ await insert_phrase_to_db(conn, gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
111
+ try:
112
+ await c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
113
+ except aiosqlite.IntegrityError:
114
+ await c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
115
+ await conn.commit()
116
  logging.info("Database population complete.")
117
 
118
+ async def get_translation(phrase):
119
  """Retrieves or generates the English translation of a Hebrew phrase."""
120
  global translator, conn
121
+ async with conn:
122
+ c = await conn.cursor()
123
+ await c.execute('''
124
+ SELECT translation FROM results
 
 
 
 
 
 
 
 
 
125
  WHERE words = ?
126
+ ''', (phrase,))
127
+ result = await c.fetchone()
128
+ if result and result[0]:
129
+ return result[0]
130
+ else:
131
+ translation = translate_and_store(phrase)
132
+ await c.execute('''
133
+ UPDATE results
134
+ SET translation = ?
135
+ WHERE words = ?
136
+ ''', (translation, phrase))
137
+ await conn.commit()
138
+ return translation
139
 
140
  def translate_and_store(phrase):
141
  global translator
142
+ max_retries = 3
143
  retries = 0
144
 
145
  while retries < max_retries:
 
148
  logging.debug(f"Translated phrase: {translation}")
149
  return translation
150
  except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
151
+ exceptions.ServerException, exceptions.RequestError, requests.exceptions.ConnectionError) as e:
152
  retries += 1
153
  logging.warning(f"Error translating phrase '{phrase}': {e}. Retrying... ({retries}/{max_retries})")
154
 
155
  logging.error(f"Failed to translate phrase '{phrase}' after {max_retries} retries.")
156
+ return "[Translation Error]"
157
 
158
+ async def search_gematria_in_db(conn, gematria_sum):
159
  """Searches the database for phrases with a given Gematria value."""
160
+ async with conn:
161
+ c = await conn.cursor()
162
+ await c.execute('''
163
+ SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
164
+ ''', (gematria_sum,))
165
+ results = await c.fetchall()
166
+ logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
167
+ return results
168
+
169
+ async def gematria_search_interface(phrase, request: gr.Request):
170
  """The main function for the Gradio interface."""
171
  global ongoing_search_task, conn, book_names
172
 
 
177
  if ongoing_search_task is not None and not ongoing_search_task.done():
178
  ongoing_search_task.cancel()
179
 
 
 
 
 
 
 
 
180
  # Start the search asynchronously
181
  async def search_task():
182
+ matching_phrases = await search_gematria_in_db(conn, calculate_gematria(phrase.replace(" ", "")))
183
 
184
  if not matching_phrases:
185
  return "No matching phrases found."
 
196
  for book, phrases in results_by_book.items():
197
  results.append(f"<h4>Book: {book_names.get(book, 'Unknown')}</h4>")
198
  for words, chapter, verse in phrases:
199
+ translation = await get_translation(words) # Await the translation here
200
  book_name_english = book_names.get(book, 'Unknown')
201
  link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
202
 
 
242
 
243
  return style + "\n".join(results)
244
 
245
+ ongoing_search_task = request.app.get_blocks().queue.insert(fn=search_task, queue_id="gematria")
246
+ result = request.app.get_blocks().queue.get_output(queue_id="gematria", job_hash=ongoing_search_task.job_hash)
247
+ return result
248
 
249
+ async def run_app():
250
+ """Initializes, populates the database, and launches the Gradio app."""
251
+ global conn
252
+ await initialize_database()
253
  initialize_translator()
254
 
255
+ # Move database population to a separate function
256
+ async def populate_database():
257
+ tanach_texts_1_1_1 = process_json_files(1, 1)
258
+ tanach_texts_1_39_1 = process_json_files(1, 39)
259
+ tanach_texts_27_27_4 = process_json_files(27, 27)
260
+ await populate_database_async(conn, tanach_texts_1_1_1, max_phrase_length=1)
261
+ await populate_database_async(conn, tanach_texts_1_39_1, max_phrase_length=1)
262
+ await populate_database_async(conn, tanach_texts_27_27_4, max_phrase_length=4)
 
263
 
264
+ # Start database population in the background
265
+ asyncio.create_task(populate_database())
266
 
267
+ # Create the main Gradio interface
268
  iface = gr.Interface(
269
  fn=gematria_search_interface,
270
  inputs=gr.Textbox(label="Enter phrase"),
 
272
  title="Gematria Search in Tanach",
273
  description="Search for phrases in the Tanach that have the same Gematria value.",
274
  live=False,
275
+ allow_flagging="never",
276
+ concurrency_limit=3
277
  )
278
+
279
+ iface.launch(max_threads=10)
280
 
281
  if __name__ == "__main__":
282
  asyncio.run(run_app())