File size: 14,474 Bytes
3045238
e4ee4df
3045238
e4ee4df
3045238
 
 
 
 
 
 
 
 
 
 
e4ee4df
3045238
 
 
 
 
1fefd64
3045238
 
 
 
 
e4ee4df
3045238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4ee4df
3045238
 
 
e4ee4df
3045238
 
 
 
 
 
e4ee4df
3045238
 
 
 
 
 
 
 
 
 
 
 
e4ee4df
3045238
 
 
 
e594864
3045238
 
e4ee4df
3045238
 
 
 
e4ee4df
 
 
 
3045238
 
e4ee4df
3045238
 
 
 
 
 
 
 
 
 
 
 
e4ee4df
 
 
3045238
 
e4ee4df
3045238
 
 
e4ee4df
3045238
e4ee4df
 
 
 
3045238
e4ee4df
 
 
 
3045238
e4ee4df
 
 
3045238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4ee4df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3045238
 
e4ee4df
 
 
 
3045238
 
e4ee4df
 
3045238
 
 
 
 
 
 
e4ee4df
3045238
 
e4ee4df
3045238
 
 
e4ee4df
3045238
 
 
 
 
 
 
e4ee4df
3045238
 
 
 
 
 
 
 
 
e4ee4df
 
 
3045238
 
e4ee4df
3045238
 
 
 
e4ee4df
3045238
 
 
e4ee4df
3045238
 
 
 
e4ee4df
3045238
 
 
 
82d4320
e4ee4df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95fdffd
 
e4ee4df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82d4320
 
e4ee4df
3045238
 
 
 
e4ee4df
 
 
 
 
 
 
 
 
 
82d4320
 
e4ee4df
 
 
 
 
 
3045238
 
 
 
 
 
 
 
 
 
e4ee4df
 
 
 
3045238
 
e4ee4df
 
 
 
3045238
 
 
e4ee4df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

import gradio as gr
import torah
import bible
import quran
from utils import number_to_ordinal_word, custom_normalize, date_to_words, translate_date_to_words
from gematria import calculate_gematria, strip_diacritics

import pandas as pd
from deep_translator import GoogleTranslator
from gradio_calendar import Calendar
from datetime import datetime, timedelta
import math
import json
import re
import sqlite3
from collections import defaultdict
from typing import List, Tuple

# --- Constants ---
DATABASE_FILE = 'gematria.db'
MAX_PHRASE_LENGTH_LIMIT = 20


# --- Database Initialization ---
def initialize_database():
    global conn
    conn = sqlite3.connect(DATABASE_FILE)
    cursor = conn.cursor()
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS results (
        gematria_sum INTEGER,
        words TEXT,
        translation TEXT,
        book TEXT,
        chapter INTEGER,
        verse INTEGER,
        phrase_length INTEGER,
        word_position TEXT, 
        PRIMARY KEY (gematria_sum, words, book, chapter, verse, word_position)
    )
    ''')
    cursor.execute('''
    CREATE INDEX IF NOT EXISTS idx_results_gematria
    ON results (gematria_sum)
    ''')
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS processed_books (
        book TEXT PRIMARY KEY,
        max_phrase_length INTEGER
    )
    ''')
    conn.commit()


# --- Initialize Database ---
initialize_database()


# --- Helper Functions (from Network app.py) ---
def flatten_text(text: List) -> str:
    if isinstance(text, list):
        return " ".join(flatten_text(item) if isinstance(item, list) else item for item in text)
    return text


def search_gematria_in_db(gematria_sum: int, max_words: int) -> List[Tuple[str, str, int, int, int, str]]:
    global conn
    with sqlite3.connect(DATABASE_FILE) as conn:
        cursor = conn.cursor()
        cursor.execute('''
        SELECT words, book, chapter, verse, phrase_length, word_position 
        FROM results 
        WHERE gematria_sum = ? AND phrase_length <= ?
        ''', (gematria_sum, max_words))
        results = cursor.fetchall()
    return results


def get_most_frequent_phrase(results):
    phrase_counts = defaultdict(int)
    for words, book, chapter, verse, phrase_length, word_position in results:
        phrase_counts[words] += 1
    most_frequent_phrase = max(phrase_counts, key=phrase_counts.get) if phrase_counts else None  # Handle empty results
    return most_frequent_phrase


# --- Functions from BOS app.py ---
def create_language_dropdown(label, default_value='en', show_label=True):
    languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
    return gr.Dropdown(
        choices=list(languages.keys()),
        label=label,
        value=default_value,
        show_label=show_label
    )


def calculate_gematria_sum(text, date_words):
    if text or date_words:
        combined_input = f"{text} {date_words}"
        numbers = re.findall(r'\d+', combined_input)
        text_without_numbers = re.sub(r'\d+', '', combined_input)
        number_sum = sum(int(number) for number in numbers)
        text_gematria = calculate_gematria(strip_diacritics(text_without_numbers))
        total_sum = text_gematria + number_sum
        return total_sum
    else:
        return None


def perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
                       merge_results, include_torah, include_bible, include_quran):
    if step == 0 or rounds_combination == "0,0":
        return None

    torah_results = []
    bible_results = []
    quran_results = []

    if include_torah:
        torah_results.extend(
            torah.process_json_files(1, 39, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
                                     strip_diacritics))

    if include_bible:
        bible_results.extend(
            bible.process_json_files(40, 66, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
                                     strip_diacritics))

    if include_quran:
        quran_results.extend(
            quran.process_json_files(1, 114, step, rounds_combination, 0, tlang, strip_spaces, strip_in_braces,
                                     strip_diacritics))

    if merge_results:
        results = []
        max_length = max(len(torah_results), len(bible_results), len(quran_results))
        for i in range(max_length):
            if i < len(torah_results):
                results.append(torah_results[i])
            if i < len(bible_results):
                results.append(bible_results[i])
            if i < len(quran_results):
                results.append(quran_results[i])
    else:
        results = torah_results + bible_results + quran_results

    return results


def generate_json_dump(start, end, step, rounds_combination, tlang, strip_spaces, strip_in_braces, strip_diacritics_chk,
                       search_phrase, results_df, start_date, end_date):
    """Generates the JSON dump with configuration, date range, and results."""
    config = {
        "Start Book": start,
        "End Book": end,
        "Step": step,
        "Rounds": rounds_combination,
        "Target Language": tlang,
        "Strip Spaces": strip_spaces,
        "Strip Text in Braces": strip_in_braces,
        "Strip Diacritics": strip_diacritics_chk,
        "Search Phrase": search_phrase
    }
    result = {
        "Configuration": config,
        "DateRange": {
            "StartDate": start_date.strftime("%Y-%m-%d"),
            "EndDate": end_date.strftime("%Y-%m-%d")
        },
        "Results": json.loads(results_df.to_json(orient='records', force_ascii=False))
    }
    logger.info(f"Generated JSON dump: {result}")
    return json.dumps(result, indent=4, ensure_ascii=False)


def download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk):
    """Downloads the JSON config file with a descriptive name."""
    filename_suffix = ""
    if strip_spaces:
        filename_suffix += "-stSp"
    if strip_in_braces:
        filename_suffix += "-stBr"
    if strip_diacritics_chk:
        filename_suffix += "-stDc"
    file_path = f"step-{step}-rounds-{rounds_combination}{filename_suffix}.json"  # Include rounds in filename
    with open(file_path, "w", encoding='utf-8') as file:
        file.write(config_json)
    logger.info(f"Downloaded JSON file to: {file_path}")
    return file_path


# --- Main Gradio App ---
with gr.Blocks() as app:
    with gr.Row():
        start_date = Calendar(type="datetime", label="Start Date")
        end_date = Calendar(type="datetime", label="End Date")

    with gr.Row():
        tlang = create_language_dropdown("Target Language for Translation", default_value='english')
        date_language_input = create_language_dropdown("Language of the person/topic (optional) (Date Word Language)",
                                                       default_value='english')

    with gr.Row():
        gematria_text = gr.Textbox(label="Name and/or Topic (required)", value="Hans Albert Einstein")
        gematria_result = gr.Number(label="Journal Sum")

    with gr.Row():
        step = gr.Number(label="Jump Width (Steps) for ELS")
        float_step = gr.Number(visible=False, value=1)
        half_step_btn = gr.Button("Steps / 2")
        double_step_btn = gr.Button("Steps * 2")

        with gr.Column():
            round_x = gr.Number(label="Round (1)", value=1)
            round_y = gr.Number(label="Round (2)", value=-1)

        rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")

    with gr.Row():
        include_torah_chk = gr.Checkbox(label="Include Torah", value=True)
        include_bible_chk = gr.Checkbox(label="Include Bible", value=True)
        include_quran_chk = gr.Checkbox(label="Include Quran", value=True)
        merge_results_chk = gr.Checkbox(label="Merge Results (Torah-Bible-Quran)", value=True)

        strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
        strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
        strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)

    translate_btn = gr.Button("Search with ELS")

    # --- Output Components ---
    markdown_output = gr.Dataframe(label="ELS Results")
    most_frequent_phrase_output = gr.Textbox(label="Most Frequent Phrase in Network Search")
    json_output = gr.Textbox(label="JSON Configuration Output")
    json_download_btn = gr.Button("Prepare .json for Download")
    json_file = gr.File(label="Download Config JSON", file_count="single")


    # --- Event Handlers ---
    def update_journal_sum(gematria_text, date_words_output):
        sum_value = calculate_gematria_sum(gematria_text, date_words_output)
        return sum_value, sum_value, sum_value


    def update_rounds_combination(round_x, round_y):
        return f"{int(round_x)},{int(round_y)}"


    def update_step_half(float_step):
        new_step = math.ceil(float_step / 2)
        return new_step, float_step / 2


    def update_step_double(float_step):
        new_step = math.ceil(float_step * 2)
        return new_step, float_step * 2


    def perform_search(start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces,
                       strip_in_braces, strip_diacritics_chk, merge_results, include_torah, include_bible,
                       include_quran, gematria_text):
        all_results = []
        delta = timedelta(days=1)
        current_date = start_date

        while current_date <= end_date:
            date_words_output = translate_date_to_words(current_date, date_language_input)
            journal_sum, _, _ = update_journal_sum(gematria_text, date_words_output)
            step = journal_sum

            els_results = perform_els_search(step, rounds_combination, tlang, strip_spaces, strip_in_braces,
                                             strip_diacritics_chk, merge_results, include_torah, include_bible,
                                             include_quran)

            # --- Network Search Integration ---
            updated_els_results = []
            for result in els_results:
                try:
                    gematria_sum = calculate_gematria(result['result_text'])
                except KeyError as e:
                    print(f"DEBUG: KeyError - Key '{e.args[0]}' not found in result. Skipping this result.")
                    continue

                max_words = len(result['result_text'].split())
                matching_phrases = search_gematria_in_db(gematria_sum, max_words)

                # Iteratively increase max_words if no results are found
                max_words_limit = 20  # Set a limit for max_words
                while not matching_phrases and max_words < max_words_limit:
                    max_words += 1
                    matching_phrases = search_gematria_in_db(gematria_sum, max_words)

                # Find most frequent phrase or first phrase with lowest word count
                if matching_phrases:
                    most_frequent_phrase = get_most_frequent_phrase(matching_phrases)
                else:
                    # Sort initial results by word count and take the first phrase
                    sorted_results = sorted(search_gematria_in_db(gematria_sum, max_words_limit),
                                            key=lambda x: len(x[0].split()))
                    most_frequent_phrase = sorted_results[0][0] if sorted_results else ""

                # Add most frequent phrase, date, and date_words to the result dictionary
                result['Most Frequent Phrase'] = most_frequent_phrase
                result['Date'] = current_date.strftime('%Y-%m-%d')
                result['Date Words'] = date_words_output
                updated_els_results.append(result)

            all_results.extend(updated_els_results)
            current_date += delta

        # --- Prepare Dataframe ---
        df = pd.DataFrame(all_results)
        df.index = range(1, len(df) + 1)
        df.reset_index(inplace=True)
        df.rename(columns={'index': 'Result Number'}, inplace=True)

        # Find the most frequent phrase across all dates
        all_phrases = [result['Most Frequent Phrase'] for result in all_results]
        most_frequent_phrase = max(set(all_phrases), key=all_phrases.count) if all_phrases else ""

        # Generate JSON output
        search_phrase = f"{gematria_text}"  # Removed date_words_output as it's now included in each result
        config_json = generate_json_dump(1, 180, step, rounds_combination, tlang, strip_spaces, strip_in_braces,
                                         strip_diacritics_chk, search_phrase, df, start_date, end_date)

        return df, most_frequent_phrase, config_json


    def handle_json_download(config_json, step, rounds_combination, strip_spaces, strip_in_braces,
                             strip_diacritics_chk):
        """Handles the download of the JSON config file."""
        return download_json_file(config_json, step, rounds_combination, strip_spaces, strip_in_braces,
                                  strip_diacritics_chk)


    # --- Event Triggers ---
    round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
    round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)

    half_step_btn.click(update_step_half, inputs=[float_step], outputs=[step, float_step])
    double_step_btn.click(update_step_double, inputs=[float_step], outputs=[step, float_step])

    translate_btn.click(
        perform_search,
        inputs=[start_date, end_date, date_language_input, step, rounds_combination, tlang, strip_spaces,
                strip_in_braces, strip_diacritics_chk, merge_results_chk, include_torah_chk, include_bible_chk,
                include_quran_chk, gematria_text],
        outputs=[markdown_output, most_frequent_phrase_output, json_output]
    )

    json_download_btn.click(
        handle_json_download,
        inputs=[json_output, step, rounds_combination, strip_spaces, strip_in_braces, strip_diacritics_chk],
        outputs=[json_file]
    )

if __name__ == "__main__":
    app.launch(share=False)