neuralworm commited on
Commit
e8bf2aa
·
1 Parent(s): 6c0aa26

every result is now saved, no links

Browse files
Files changed (3) hide show
  1. app.py +169 -170
  2. gematria.db +2 -2
  3. util.py +6 -9
app.py CHANGED
@@ -7,206 +7,205 @@ from collections import defaultdict
7
  from util import process_json_files
8
  from gematria import calculate_gematria
9
  from deep_translator import GoogleTranslator, exceptions
 
10
 
11
  # Set up logging
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
  # Global variables for database connection and translator
15
  conn = None
16
  translator = None
 
17
 
18
  def flatten_text(text):
19
- """Helper function to flatten nested lists into a single list."""
20
- if isinstance(text, list):
21
- return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
22
- return text
23
 
24
  def initialize_database():
25
- """Initializes the SQLite database."""
26
- global conn
27
- conn = sqlite3.connect('gematria.db')
28
- c = conn.cursor()
29
- c.execute('''
30
  CREATE TABLE IF NOT EXISTS results (
31
- gematria_sum INTEGER,
32
- words TEXT UNIQUE,
33
- translation TEXT,
34
- occurrences TEXT,
35
- PRIMARY KEY (words)
 
 
36
  )
37
- ''')
38
- c.execute('''
39
  CREATE TABLE IF NOT EXISTS processed_books (
40
- book INTEGER PRIMARY KEY,
41
- max_phrase_length INTEGER
42
  )
43
- ''')
44
- conn.commit()
45
- logging.info("Database initialized.")
46
-
47
- def initialize_translator():
48
- """Initializes the Google Translator."""
49
- global translator
50
- translator = GoogleTranslator(source='iw', target='en')
51
- logging.info("Translator initialized.")
52
-
53
- def insert_phrase_to_db(gematria_sum, phrase_candidate, translation, occurrence):
54
- """Inserts a phrase and its Gematria value into the database."""
55
- global conn
56
- c = conn.cursor()
57
- try:
58
- c.execute('''
59
- INSERT INTO results (gematria_sum, words, translation, occurrences)
60
- VALUES (?, ?, ?, ?)
61
- ''', (gematria_sum, phrase_candidate, translation, occurrence))
62
- conn.commit()
63
- logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum})")
64
- except sqlite3.IntegrityError:
65
- logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum})")
66
- c.execute('''
67
- SELECT occurrences FROM results WHERE words = ?
68
- ''', (phrase_candidate,))
69
- existing_occurrences = c.fetchone()[0]
70
- updated_occurrences = existing_occurrences + ';' + occurrence
71
- c.execute('''
72
- UPDATE results
73
- SET occurrences = ?
74
- WHERE words = ?
75
- ''', (updated_occurrences, phrase_candidate))
76
  conn.commit()
 
77
 
78
- def populate_database(tanach_texts, max_phrase_length=3):
79
- """Populates the database with phrases from the Tanach and their Gematria values."""
80
- global conn
81
- logging.info("Populating database...")
82
- c = conn.cursor()
83
- for book_id, text in enumerate(tanach_texts):
84
- c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id + 1,))
85
- result = c.fetchone()
86
- if result and result[0] >= max_phrase_length:
87
- logging.info(f"Skipping book {book_id+1}: Already processed with max_phrase_length {result[0]}")
88
- continue
89
-
90
- logging.info(f"Processing book {book_id+1} with max_phrase_length {max_phrase_length}")
91
- if 'text' not in text or not isinstance(text['text'], list):
92
- logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
93
- continue
94
- title = text.get('title', 'Unknown')
95
- chapters = text['text']
96
- for chapter_id, chapter in enumerate(chapters):
97
- if not isinstance(chapter, list):
98
- logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
99
- continue
100
- for verse_id, verse in enumerate(chapter):
101
- verse_text = flatten_text(verse)
102
- verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
103
- verse_text = re.sub(r" +", " ", verse_text)
104
- words = verse_text.split()
105
- for length in range(1, max_phrase_length + 1):
106
- for start in range(len(words) - length + 1):
107
- phrase_candidate = " ".join(words[start:start + length])
108
- gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
109
- occurrence = f"{book_id+1}:{title}:{chapter_id+1}:{verse_id+1}"
110
- insert_phrase_to_db(gematria_sum, phrase_candidate, None, occurrence) # No translation initially
111
  try:
112
- c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id + 1, max_phrase_length))
 
 
 
 
 
113
  except sqlite3.IntegrityError:
114
- c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id + 1))
115
- conn.commit()
116
- logging.info("Database population complete.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def get_translation(phrase):
119
- """Retrieves or generates the English translation of a Hebrew phrase."""
120
- global translator, conn
121
- c = conn.cursor()
122
- c.execute('''
123
  SELECT translation FROM results
124
  WHERE words = ?
125
- ''', (phrase,))
126
- result = c.fetchone()
127
- if result and result[0]:
128
- return result[0]
129
- else:
130
- translation = translate_and_store(phrase)
131
- c.execute('''
132
- UPDATE results
133
- SET translation = ?
134
- WHERE words = ?
135
- ''', (translation, phrase))
136
- conn.commit()
137
- return translation
138
 
139
 
140
  def translate_and_store(phrase):
141
- """Translates a phrase using Google Translate."""
142
- global translator
143
- try:
144
- translation = translator.translate(phrase)
145
- logging.debug(f"Translated phrase: {translation}")
146
- return translation
147
- except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
148
- exceptions.ServerException, exceptions.RequestError) as e:
149
- logging.error(f"Error translating phrase '{phrase}': {e}")
150
- return "[Translation Error]"
151
 
152
  def search_gematria_in_db(gematria_sum):
153
- """Searches the database for phrases with a given Gematria value."""
154
- global conn
155
- c = conn.cursor()
156
- c.execute('''
157
- SELECT words, occurrences FROM results WHERE gematria_sum = ?
158
- ''', (gematria_sum,))
159
- results = c.fetchall()
160
- logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
161
- return results
162
 
163
  def gematria_search_interface(phrase):
164
- """The main function for the Gradio interface."""
165
- if not phrase.strip():
166
- return "Please enter a phrase."
167
-
168
- # Datenbankverbindung innerhalb der Funktion erstellen
169
- global conn
170
- conn = sqlite3.connect('gematria.db')
171
- c = conn.cursor()
172
-
173
- phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
174
- logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
175
-
176
- matching_phrases = search_gematria_in_db(phrase_gematria)
177
- if not matching_phrases:
178
- return "No matching phrases found."
179
-
180
- # Format results for display
181
- results = []
182
- for words, occurrences in matching_phrases:
183
- translation = get_translation(words)
184
- for occurrence in occurrences.split(';'):
185
- book, title, chapter, verse = occurrence.split(':')
186
- results.append(f"Book: {title} ({book})\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n\n")
187
-
188
- conn.close()
189
- return "\n".join(results)
 
190
 
191
  def run_app():
192
- """Initializes and launches the Gradio app."""
193
- initialize_database()
194
- initialize_translator()
195
-
196
- # Pre-populate the database
197
- tanach_texts = process_json_files(1, 39)
198
- populate_database(tanach_texts)
199
-
200
- iface = gr.Interface(
201
- fn=gematria_search_interface,
202
- inputs=gr.Textbox(label="Enter phrase"),
203
- outputs=gr.Textbox(label="Results"),
204
- title="Gematria Search in Tanach",
205
- description="Search for phrases in the Tanach that have the same Gematria value.",
206
- live=False,
207
- allow_flagging="never"
208
- )
209
- iface.launch()
 
 
210
 
211
  if __name__ == "__main__":
212
- run_app()
 
7
  from util import process_json_files
8
  from gematria import calculate_gematria
9
  from deep_translator import GoogleTranslator, exceptions
10
+ from urllib.parse import quote_plus
11
 
12
  # Set up logging
13
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
  # Global variables for database connection and translator
16
  conn = None
17
  translator = None
18
+ book_names = {} # Dictionary to store book names
19
 
20
  def flatten_text(text):
21
+ """Helper function to flatten nested lists into a single list."""
22
+ if isinstance(text, list):
23
+ return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
24
+ return text
25
 
26
  def initialize_database():
27
+ """Initializes the SQLite database."""
28
+ global conn
29
+ conn = sqlite3.connect('gematria.db')
30
+ c = conn.cursor()
31
+ c.execute('''
32
  CREATE TABLE IF NOT EXISTS results (
33
+ gematria_sum INTEGER,
34
+ words TEXT UNIQUE,
35
+ translation TEXT,
36
+ book INTEGER,
37
+ chapter INTEGER,
38
+ verse INTEGER,
39
+ PRIMARY KEY (words, book, chapter, verse)
40
  )
41
+ ''')
42
+ c.execute('''
43
  CREATE TABLE IF NOT EXISTS processed_books (
44
+ book INTEGER PRIMARY KEY,
45
+ max_phrase_length INTEGER
46
  )
47
+ ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  conn.commit()
49
+ logging.info("Database initialized.")
50
 
51
+ def initialize_translator():
52
+ """Initializes the Google Translator."""
53
+ global translator
54
+ translator = GoogleTranslator(source='iw', target='en')
55
+ logging.info("Translator initialized.")
56
+
57
+ def insert_phrase_to_db(gematria_sum, phrase_candidate, book, chapter, verse):
58
+ """Inserts a phrase and its Gematria value into the database."""
59
+ global conn
60
+ c = conn.cursor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
+ c.execute('''
63
+ INSERT INTO results (gematria_sum, words, book, chapter, verse)
64
+ VALUES (?, ?, ?, ?, ?)
65
+ ''', (gematria_sum, phrase_candidate, book, chapter, verse))
66
+ conn.commit()
67
+ logging.debug(f"Inserted phrase: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
68
  except sqlite3.IntegrityError:
69
+ logging.debug(f"Phrase already exists: {phrase_candidate} (Gematria: {gematria_sum}) at {book}:{chapter}:{verse}")
70
+
71
+ def populate_database(tanach_texts, max_phrase_length=3):
72
+ """Populates the database with phrases from the Tanach and their Gematria values."""
73
+ global conn, book_names
74
+ logging.info("Populating database...")
75
+ c = conn.cursor()
76
+
77
+ for book_id, text in tanach_texts: # Unpack the tuple (book_id, text)
78
+ c.execute('''SELECT max_phrase_length FROM processed_books WHERE book = ?''', (book_id,))
79
+ result = c.fetchone()
80
+ if result and result[0] >= max_phrase_length:
81
+ logging.info(f"Skipping book {book_id}: Already processed with max_phrase_length {result[0]}")
82
+ continue
83
+
84
+ logging.info(f"Processing book {book_id} with max_phrase_length {max_phrase_length}")
85
+ if 'text' not in text or not isinstance(text['text'], list):
86
+ logging.warning(f"Skipping book {book_id} due to missing or invalid 'text' field.")
87
+ continue
88
+
89
+ title = text.get('title', 'Unknown')
90
+ book_names[book_id] = title # Store book name
91
+
92
+ chapters = text['text']
93
+ for chapter_id, chapter in enumerate(chapters):
94
+ if not isinstance(chapter, list):
95
+ logging.warning(f"Skipping chapter {chapter_id} in book {title} due to invalid format.")
96
+ continue
97
+ for verse_id, verse in enumerate(chapter):
98
+ verse_text = flatten_text(verse)
99
+ verse_text = re.sub(r"[^\u05D0-\u05EA ]+", "", verse_text)
100
+ verse_text = re.sub(r" +", " ", verse_text)
101
+ words = verse_text.split()
102
+ for length in range(1, max_phrase_length + 1):
103
+ for start in range(len(words) - length + 1):
104
+ phrase_candidate = " ".join(words[start:start + length])
105
+ gematria_sum = calculate_gematria(phrase_candidate.replace(" ", ""))
106
+ insert_phrase_to_db(gematria_sum, phrase_candidate, book_id, chapter_id + 1, verse_id + 1)
107
+ try:
108
+ c.execute('''INSERT INTO processed_books (book, max_phrase_length) VALUES (?, ?)''', (book_id, max_phrase_length))
109
+ except sqlite3.IntegrityError:
110
+ c.execute('''UPDATE processed_books SET max_phrase_length = ? WHERE book = ?''', (max_phrase_length, book_id))
111
+ conn.commit()
112
+ logging.info("Database population complete.")
113
 
114
  def get_translation(phrase):
115
+ """Retrieves or generates the English translation of a Hebrew phrase."""
116
+ global translator, conn
117
+ c = conn.cursor()
118
+ c.execute('''
119
  SELECT translation FROM results
120
  WHERE words = ?
121
+ ''', (phrase,))
122
+ result = c.fetchone()
123
+ if result and result[0]:
124
+ return result[0]
125
+ else:
126
+ translation = translate_and_store(phrase)
127
+ c.execute('''
128
+ UPDATE results
129
+ SET translation = ?
130
+ WHERE words = ?
131
+ ''', (translation, phrase))
132
+ conn.commit()
133
+ return translation
134
 
135
 
136
  def translate_and_store(phrase):
137
+ """Translates a phrase using Google Translate."""
138
+ global translator
139
+ try:
140
+ translation = translator.translate(phrase)
141
+ logging.debug(f"Translated phrase: {translation}")
142
+ return translation
143
+ except (exceptions.TranslationNotFound, exceptions.NotValidPayload,
144
+ exceptions.ServerException, exceptions.RequestError) as e:
145
+ logging.error(f"Error translating phrase '{phrase}': {e}")
146
+ return "[Translation Error]"
147
 
148
  def search_gematria_in_db(gematria_sum):
149
+ """Searches the database for phrases with a given Gematria value."""
150
+ global conn
151
+ c = conn.cursor()
152
+ c.execute('''
153
+ SELECT words, book, chapter, verse FROM results WHERE gematria_sum = ?
154
+ ''', (gematria_sum,))
155
+ results = c.fetchall()
156
+ logging.debug(f"Found {len(results)} matching phrases for Gematria: {gematria_sum}")
157
+ return results
158
 
159
  def gematria_search_interface(phrase):
160
+ """The main function for the Gradio interface."""
161
+ if not phrase.strip():
162
+ return "Please enter a phrase."
163
+
164
+ # Create database connection inside the function
165
+ global conn, book_names
166
+ conn = sqlite3.connect('gematria.db')
167
+ c = conn.cursor()
168
+
169
+ phrase_gematria = calculate_gematria(phrase.replace(" ", ""))
170
+ logging.info(f"Searching for phrases with Gematria: {phrase_gematria}")
171
+
172
+ matching_phrases = search_gematria_in_db(phrase_gematria)
173
+ if not matching_phrases:
174
+ return "No matching phrases found."
175
+
176
+ # Format results for display
177
+ results = []
178
+ for words, book, chapter, verse in matching_phrases:
179
+ translation = get_translation(words)
180
+ book_name_english = book_names.get(book, 'Unknown') # Get book name
181
+ link = f"https://www.biblegateway.com/passage/?search={quote_plus(book_name_english)}+{chapter}%3A{verse}"
182
+ results.append(
183
+ f"Book: {book_name_english}\nChapter: {chapter}, Verse: {verse}\nPhrase: {words}\nTranslation: {translation}\n<a href='{link}' target='_blank'>[See on Bible Gateway]</a>\n\n")
184
+
185
+ conn.close()
186
+ return "\n".join(results)
187
 
188
  def run_app():
189
+ """Initializes and launches the Gradio app."""
190
+ initialize_database()
191
+ initialize_translator()
192
+
193
+ # Pre-populate the database
194
+ tanach_texts = process_json_files(1, 1) # Process all books
195
+ populate_database(tanach_texts, max_phrase_length=4)
196
+ tanach_texts = process_json_files(27, 27) # Process all books
197
+ populate_database(tanach_texts, max_phrase_length=4)
198
+
199
+ iface = gr.Interface(
200
+ fn=gematria_search_interface,
201
+ inputs=gr.Textbox(label="Enter phrase"),
202
+ outputs=gr.HTML(label="Results"),
203
+ title="Gematria Search in Tanach",
204
+ description="Search for phrases in the Tanach that have the same Gematria value.",
205
+ live=False,
206
+ allow_flagging="never"
207
+ )
208
+ iface.launch()
209
 
210
  if __name__ == "__main__":
211
+ run_app()
gematria.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fcf5c1564217654d09ee0f57dc0025114c5df51a352f829932e86a10570ce09
3
- size 47960064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4203619e032a9d31dd2232d2ac4960f26ce6af8fe04728bb0a53aac0ff05a5e7
3
+ size 7172096
util.py CHANGED
@@ -11,17 +11,14 @@ def process_json_files(start, end):
11
  with open(file_name, 'r', encoding='utf-8') as file:
12
  data = json.load(file)
13
  if data:
14
- results.append({
15
- "book": i,
16
- "title": data.get("title", "No title"),
17
- "text": data.get("text", "No text"),
18
- })
19
 
20
  except FileNotFoundError:
21
- results.append({"error": f"File {file_name} not found."})
22
  except json.JSONDecodeError as e:
23
- results.append({"error": f"File {file_name} could not be read as JSON: {e}"})
24
  except KeyError as e:
25
- results.append({"error": f"Expected key 'text' is missing in {file_name}: {e}"})
26
 
27
- return results
 
11
  with open(file_name, 'r', encoding='utf-8') as file:
12
  data = json.load(file)
13
  if data:
14
+ # Return a tuple of book_id and text data
15
+ results.append((i, {"title": data.get("title", "No title"), "text": data.get("text", [])}))
 
 
 
16
 
17
  except FileNotFoundError:
18
+ results.append((i, {"error": f"File {file_name} not found."})) # Use a tuple here
19
  except json.JSONDecodeError as e:
20
+ results.append((i, {"error": f"File {file_name} could not be read as JSON: {e}"})) # Use a tuple here
21
  except KeyError as e:
22
+ results.append((i, {"error": f"Expected key 'text' is missing in {file_name}: {e}"})) # Use a tuple here
23
 
24
+ return results