neuralworm commited on
Commit
2a65456
1 Parent(s): 40288c3

speed up search

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +106 -204
  3. temuraeh.py +0 -59
  4. utils.py +23 -10
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ .idea
app.py CHANGED
@@ -1,34 +1,31 @@
1
- import logging
2
  import json
 
3
  import math
4
- import re
5
  from datetime import datetime, timedelta
6
 
 
7
  import pandas as pd
8
  from deep_translator import GoogleTranslator
9
  from gradio_calendar import Calendar
10
- import gradio as gr
11
- import torah
12
 
13
  from gematria import calculate_gematria, strip_diacritics
14
  from utils import (
15
- number_to_ordinal_word,
16
- custom_normalize,
17
  date_to_words,
18
  translate_date_to_words,
19
  process_json_files
20
  )
21
 
22
-
23
  # --- Constants ---
24
  FORBIDDEN_NAMES_FILE = "c.txt"
 
25
 
26
  logger = logging.getLogger(__name__)
27
  logging.basicConfig(level=logging.DEBUG)
28
 
 
29
  # --- Helper Functions ---
30
 
31
- def create_language_dropdown(label: str, default_value: str = 'en', show_label: bool = True) -> gr.Dropdown:
32
  """Creates a Gradio dropdown menu for language selection.
33
 
34
  Args:
@@ -39,7 +36,7 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
39
  Returns:
40
  gr.Dropdown: The Gradio dropdown component.
41
  """
42
- languages = GoogleTranslator(source='en', target='en').get_supported_languages(as_dict=True)
43
  return gr.Dropdown(
44
  choices=list(languages.keys()),
45
  label=label,
@@ -47,60 +44,31 @@ def create_language_dropdown(label: str, default_value: str = 'en', show_label:
47
  show_label=show_label
48
  )
49
 
50
- def calculate_gematria_sum(text: str, date_words: str) -> int:
51
- """Calculates the Gematria sum for a text and date words.
52
-
53
- Args:
54
- text (str): The text for Gematria calculation.
55
- date_words (str): The date in words for Gematria calculation.
56
 
57
- Returns:
58
- int: The Gematria sum.
59
- """
60
  combined_input = f"{text} {date_words}"
61
  logger.info(f"Combined input for Gematria: {combined_input}")
62
  sum_value = calculate_gematria(strip_diacritics(combined_input))
63
  logger.info(f"Gematria sum: {sum_value}")
64
  return sum_value
65
 
66
- def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
67
- strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
68
- search_word: str, date_words: str) -> list:
69
- """Performs the ELS search and filters by the Yiddish search word.
70
-
71
- Args:
72
- start (int): The starting book number.
73
- end (int): The ending book number.
74
- step (int): The step/jump width for ELS.
75
- rounds (int): The number of rounds through the books.
76
- length (int): The desired length of the results (0 for infinite).
77
- tlang (str): The target language for translation.
78
- strip_spaces (bool): Whether to strip spaces from book content.
79
- strip_in_braces (bool): Whether to strip text within braces from book content.
80
- strip_diacritics (bool): Whether to strip diacritics from book content.
81
- average_combine (bool): Whether to average-combine the results of combined rounds.
82
- search_word (str): The word to search for.
83
- date_words (str): The date in words.
84
 
85
- Returns:
86
- list: A list of filtered results, each containing the date, book result, and translated result.
87
- """
 
88
 
89
  logger.info("Starting ELS search...")
90
- logger.debug(f"Search word (original): {search_word}")
91
-
92
- # Translate the search word to Yiddish
93
- translator_yi = GoogleTranslator(source='auto', target='yi')
94
- search_word_yiddish = translator_yi.translate(search_word)
95
- logger.debug(f"Search word (Yiddish): {search_word_yiddish}")
96
 
97
  if step == 0 or rounds == 0:
98
  logger.info("Cannot search with step 0 or rounds 0")
99
- return [] # Return an empty list instead of None
100
 
101
  results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
102
- strip_in_braces, strip_diacritics, average_combine,
103
- translate_results=False) # Don't translate here
104
 
105
  # Filter results by search word in els_result_text (Yiddish)
106
  filtered_results = []
@@ -109,38 +77,18 @@ def perform_els_search(start: int, end: int, step: int, rounds: int, length: int
109
  if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
110
  filtered_results.append({
111
  'Date': date_words,
112
- 'Book Result': result['els_result_text'], # Use the original Yiddish text
113
- 'Result': result.get('translated_text', '') # Get translated text if available
114
  })
115
 
116
  return filtered_results
117
 
118
 
119
  def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
120
- strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
121
- search_phrase: str, results_df: pd.DataFrame, search_word: str,
122
- start_date: datetime, end_date: datetime) -> str:
123
- """Generates the JSON dump with configuration, date range, and results.
124
-
125
- Args:
126
- start (int): The starting book number.
127
- end (int): The ending book number.
128
- step (int): The step/jump width for ELS.
129
- rounds (int): The number of rounds through the books.
130
- length (int): The desired length of the results (0 for infinite).
131
- tlang (str): The target language for translation.
132
- strip_spaces (bool): Whether to strip spaces from book content.
133
- strip_in_braces (bool): Whether to strip text within braces from book content.
134
- strip_diacritics_chk (bool): Whether to strip diacritics from book content.
135
- search_phrase (str): The search phrase used.
136
- results_df (pd.DataFrame): The DataFrame containing the results.
137
- search_word (str): The word to search for.
138
- start_date (datetime): The start date of the search.
139
- end_date (datetime): The end date of the search.
140
-
141
- Returns:
142
- str: The JSON dump as a string.
143
- """
144
  config = {
145
  "Start Book": start,
146
  "End Book": end,
@@ -167,20 +115,8 @@ def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int
167
 
168
 
169
  def download_json_file(config_json: str, step: int, rounds: int,
170
- strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
171
- """Downloads the JSON config file with a descriptive name.
172
-
173
- Args:
174
- config_json (str): The JSON configuration data.
175
- step (int): The step/jump width for ELS.
176
- rounds (int): The number of rounds through the books.
177
- strip_spaces (bool): Whether spaces were stripped.
178
- strip_in_braces (bool): Whether text in braces was stripped.
179
- strip_diacritics_chk (bool): Whether diacritics were stripped.
180
-
181
- Returns:
182
- str: The path to the downloaded file.
183
- """
184
  filename_suffix = ""
185
  if strip_spaces:
186
  filename_suffix += "-stSp"
@@ -188,24 +124,17 @@ def download_json_file(config_json: str, step: int, rounds: int,
188
  filename_suffix += "-stBr"
189
  if strip_diacritics_chk:
190
  filename_suffix += "-stDc"
191
- file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json" # Include rounds in filename
192
  with open(file_path, "w", encoding='utf-8') as file:
193
  file.write(config_json)
194
  logger.info(f"Downloaded JSON file to: {file_path}")
195
  return file_path
196
 
 
197
  # --- Forbidden Names Functions ---
198
 
199
  def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
200
- """Loads forbidden names from the specified file.
201
-
202
- Args:
203
- filename (str, optional): The path to the file containing forbidden names.
204
- Defaults to FORBIDDEN_NAMES_FILE.
205
-
206
- Returns:
207
- list: A list of forbidden names.
208
- """
209
  try:
210
  with open(filename, "r", encoding='utf-8') as f:
211
  forbidden_names = [line.strip() for line in f]
@@ -216,16 +145,7 @@ def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
216
 
217
 
218
  def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
219
- """Checks if a name is similar to any forbidden name.
220
-
221
- Args:
222
- name (str): The name to check.
223
- forbidden_names (list): A list of forbidden names.
224
- threshold (int, optional): The similarity threshold (0-100). Defaults to 80.
225
-
226
- Returns:
227
- bool: True if the name is similar to a forbidden name, False otherwise.
228
- """
229
  from fuzzywuzzy import fuzz
230
  for forbidden_name in forbidden_names:
231
  similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
@@ -234,13 +154,14 @@ def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80)
234
  return True
235
  return False
236
 
 
237
  # --- Gradio UI ---
238
 
239
  with gr.Blocks() as app:
240
  with gr.Row():
241
  start_date = Calendar(type="datetime", label="1. Select Start Date")
242
  end_date = Calendar(type="datetime", label="2. Select End Date")
243
- date_language_input = create_language_dropdown("3. Date Word Language (supported: all based on: latin, greek, arabic, hebrew)", default_value='english')
244
  search_word = gr.Textbox(label="4. Search Word")
245
 
246
  with gr.Row():
@@ -254,7 +175,7 @@ with gr.Blocks() as app:
254
  end = gr.Number(label="End Book", value=39)
255
  step = gr.Number(label="Jump Width (Steps) for ELS")
256
  rounds = gr.Number(label="Rounds through Books", value=1)
257
- float_step = gr.Number(visible=False, value=1) # For half/double step calculations
258
  half_step_btn = gr.Button("Steps / 2")
259
  double_step_btn = gr.Button("Steps * 2")
260
 
@@ -262,26 +183,25 @@ with gr.Blocks() as app:
262
  round_x = gr.Number(label="Round (x)", value=1)
263
  round_y = gr.Number(label="Round (y)", value=-1)
264
 
265
- average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds (hacky)", value=False)
266
  mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
267
 
268
  rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
269
 
270
-
271
  with gr.Row():
272
  length = gr.Number(label="Result Length (0=inf)", value=0)
273
- tlang = create_language_dropdown("Target Language for Translation", default_value='english')
274
  strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
275
  strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
276
  strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
277
  acknowledgment_chk = gr.Checkbox(
278
- label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.", # Add your full disclaimer here
279
  value=True
280
  )
281
 
282
  translate_btn = gr.Button("7. Search with ELS")
283
 
284
- results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results") # Changed to Dataframe
285
  json_output = gr.Textbox(label="JSON Configuration Output")
286
  json_download_btn = gr.Button("Prepare .json for Download")
287
  json_file = gr.File(label="Download Config JSON", file_count="single")
@@ -290,140 +210,121 @@ with gr.Blocks() as app:
290
 
291
  forbidden_names = load_forbidden_names()
292
 
 
293
  # --- Event Handlers ---
294
 
295
  def update_rounds_combination(round_x: int, round_y: int) -> str:
296
- """Updates the rounds_combination textbox based on round_x and round_y.
297
-
298
- Args:
299
- round_x (int): The value of round x.
300
- round_y (int): The value of round y.
301
-
302
- Returns:
303
- str: The combined rounds string.
304
- """
305
  return f"{int(round_x)},{int(round_y)}"
306
 
307
 
308
  def calculate_journal_sum(text: str, date_words: str) -> tuple:
309
- """Calculates the journal sum and updates the step value.
310
-
311
- Args:
312
- text (str): The input text for calculation.
313
- date_words (str): The date in words.
314
-
315
- Returns:
316
- tuple: A tuple containing the journal sum, step, and float_step.
317
- """
318
  if check_name_similarity(text, forbidden_names):
319
- return 0, 0, 0 # Return 0 if the name is forbidden
320
  if check_name_similarity(date_words, forbidden_names):
321
- return 0, 0, 0 # Return 0 if the name is forbidden
322
  sum_value = calculate_gematria_sum(text, date_words)
323
- return sum_value, sum_value, sum_value # Returning the same value three times
324
-
325
- def update_step_half(float_step: float) -> tuple:
326
- """Updates the step value to half.
327
 
328
- Args:
329
- float_step (float): The current float step value.
330
 
331
- Returns:
332
- tuple: A tuple containing the new step value and the new float step value.
333
- """
334
  new_step = math.ceil(float_step / 2)
335
  return new_step, float_step / 2
336
 
337
- def update_step_double(float_step: float) -> tuple:
338
- """Updates the step value to double.
339
-
340
- Args:
341
- float_step (float): The current float step value.
342
 
343
- Returns:
344
- tuple: A tuple containing the new step value and the new float step value.
345
- """
346
  new_step = math.ceil(float_step * 2)
347
  return new_step, float_step * 2
348
 
 
349
  # Update rounds_combination when round_x or round_y changes
350
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
351
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
352
 
353
 
354
  def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
355
- strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
356
- """Handles the download of the JSON config file.
357
-
358
- Args:
359
- config_json (str): The JSON configuration data.
360
- step (int): The step/jump width for ELS.
361
- rounds (int): The number of rounds through the books.
362
- strip_spaces (bool): Whether spaces were stripped.
363
- strip_in_braces (bool): Whether text in braces was stripped.
364
- strip_diacritics_chk (bool): Whether diacritics were stripped.
365
-
366
- Returns:
367
- str: The path to the downloaded file.
368
- """
369
  return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
370
 
 
371
  def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
372
- search_word: str, start: int, end: int, step: int, rounds: int, length: int,
373
- tlang: str, strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
374
- gematria_text: str, average_combine: bool) -> tuple:
375
- """Performs the ELS search for each date in the range, creates the JSON config, and displays the results.
376
-
377
- Args:
378
- start_date (datetime): The start date for the search.
379
- end_date (datetime): The end date for the search.
380
- date_language_input (str): The language for the date words.
381
- search_word (str): The word to search for.
382
- start (int): The starting book number.
383
- end (int): The ending book number.
384
- step (int): The step/jump width for ELS.
385
- rounds (int): The number of rounds through the books.
386
- length (int): The desired length of the results (0 for infinite).
387
- tlang (str): The target language for translation.
388
- strip_spaces (bool): Whether to strip spaces from book content.
389
- strip_in_braces (bool): Whether to strip text within braces from book content.
390
- strip_diacritics_chk (bool): Whether to strip diacritics from book content.
391
- gematria_text (str): The text for Gematria calculation.
392
- average_combine (bool): Whether to average-combine the results of combined rounds.
393
-
394
- Returns:
395
- tuple: A tuple containing the JSON configuration and the results DataFrame.
396
- """
397
  all_results = []
398
  delta = timedelta(days=1)
 
 
 
 
 
 
 
 
 
399
  while start_date <= end_date:
400
- date_words_output = translate_date_to_words(start_date, date_language_input)
401
- journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output) # Calculate the journal sum
402
- step = journal_sum # Set the step to the journal sum
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
  filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
405
- strip_in_braces, strip_diacritics_chk, average_combine, search_word,
 
406
  date_words_output)
407
- all_results.extend(filtered_results)
 
 
 
 
408
  start_date += delta
409
 
 
410
  if all_results:
411
  df = pd.DataFrame(all_results)
412
 
 
 
 
 
 
 
 
 
 
 
413
  # Translate the 'Book Result' column to the target language
414
- translator = GoogleTranslator(source='yi', target=tlang) # Translate from Yiddish
415
  df['Result'] = df['Book Result'].apply(translator.translate)
416
 
417
- config_json = generate_json_dump(start, end, step, rounds, length, tlang, strip_spaces,
418
- strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
419
- start_date, end_date)
420
  return config_json, df
421
  else:
422
- return "No results found.", None # Return None for the DataFrame when no results are found
 
423
 
424
  gematria_btn.click(
425
  calculate_journal_sum,
426
- inputs=[gematria_text, date_language_input], # Using date_language_input as a placeholder
427
  outputs=[gematria_result, step, float_step]
428
  )
429
 
@@ -441,7 +342,8 @@ with gr.Blocks() as app:
441
 
442
  translate_btn.click(
443
  perform_search_and_create_json,
444
- inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length, tlang, strip_spaces,
 
445
  strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
446
  outputs=[json_output, results_output]
447
  )
@@ -453,4 +355,4 @@ with gr.Blocks() as app:
453
  )
454
 
455
  if __name__ == "__main__":
456
- app.launch(share=False)
 
 
1
  import json
2
+ import logging
3
  import math
 
4
  from datetime import datetime, timedelta
5
 
6
+ import gradio as gr
7
  import pandas as pd
8
  from deep_translator import GoogleTranslator
9
  from gradio_calendar import Calendar
 
 
10
 
11
  from gematria import calculate_gematria, strip_diacritics
12
  from utils import (
 
 
13
  date_to_words,
14
  translate_date_to_words,
15
  process_json_files
16
  )
17
 
 
18
  # --- Constants ---
19
  FORBIDDEN_NAMES_FILE = "c.txt"
20
+ DEFAULT_LANGUAGE = 'english'
21
 
22
  logger = logging.getLogger(__name__)
23
  logging.basicConfig(level=logging.DEBUG)
24
 
25
+
26
  # --- Helper Functions ---
27
 
28
+ def create_language_dropdown(label: str, default_value: str = DEFAULT_LANGUAGE, show_label: bool = True) -> gr.Dropdown:
29
  """Creates a Gradio dropdown menu for language selection.
30
 
31
  Args:
 
36
  Returns:
37
  gr.Dropdown: The Gradio dropdown component.
38
  """
39
+ languages = GoogleTranslator().get_supported_languages(as_dict=True)
40
  return gr.Dropdown(
41
  choices=list(languages.keys()),
42
  label=label,
 
44
  show_label=show_label
45
  )
46
 
 
 
 
 
 
 
47
 
48
+ def calculate_gematria_sum(text: str, date_words: str) -> int:
49
+ """Calculates the Gematria sum for a text and date words."""
 
50
  combined_input = f"{text} {date_words}"
51
  logger.info(f"Combined input for Gematria: {combined_input}")
52
  sum_value = calculate_gematria(strip_diacritics(combined_input))
53
  logger.info(f"Gematria sum: {sum_value}")
54
  return sum_value
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ def perform_els_search(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
58
+ strip_spaces: bool, strip_in_braces: bool, strip_diacritics: bool, average_combine: bool,
59
+ search_word_yiddish: str, date_words: str) -> list: # Accept Yiddish word
60
+ """Performs the ELS search and filters by the Yiddish search word."""
61
 
62
  logger.info("Starting ELS search...")
63
+ logger.debug(f"Search word (Yiddish): {search_word_yiddish}") # No translation here
 
 
 
 
 
64
 
65
  if step == 0 or rounds == 0:
66
  logger.info("Cannot search with step 0 or rounds 0")
67
+ return []
68
 
69
  results = process_json_files(start, end, step, rounds, length, tlang, strip_spaces,
70
+ strip_in_braces, strip_diacritics, average_combine,
71
+ translate_results=False)
72
 
73
  # Filter results by search word in els_result_text (Yiddish)
74
  filtered_results = []
 
77
  if 'els_result_text' in result and search_word_yiddish in result['els_result_text']:
78
  filtered_results.append({
79
  'Date': date_words,
80
+ 'Book Result': result['els_result_text'],
81
+ 'Result': result.get('translated_text', '')
82
  })
83
 
84
  return filtered_results
85
 
86
 
87
  def generate_json_dump(start: int, end: int, step: int, rounds: int, length: int, tlang: str,
88
+ strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool,
89
+ search_phrase: str, results_df: pd.DataFrame, search_word: str,
90
+ start_date: datetime, end_date: datetime) -> str:
91
+ """Generates the JSON dump with configuration, date range, and results."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  config = {
93
  "Start Book": start,
94
  "End Book": end,
 
115
 
116
 
117
  def download_json_file(config_json: str, step: int, rounds: int,
118
+ strip_spaces: bool, strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
119
+ """Downloads the JSON config file with a descriptive name."""
 
 
 
 
 
 
 
 
 
 
 
 
120
  filename_suffix = ""
121
  if strip_spaces:
122
  filename_suffix += "-stSp"
 
124
  filename_suffix += "-stBr"
125
  if strip_diacritics_chk:
126
  filename_suffix += "-stDc"
127
+ file_path = f"step-{step}-rounds-{rounds}{filename_suffix}.json"
128
  with open(file_path, "w", encoding='utf-8') as file:
129
  file.write(config_json)
130
  logger.info(f"Downloaded JSON file to: {file_path}")
131
  return file_path
132
 
133
+
134
  # --- Forbidden Names Functions ---
135
 
136
  def load_forbidden_names(filename: str = FORBIDDEN_NAMES_FILE) -> list:
137
+ """Loads forbidden names from the specified file."""
 
 
 
 
 
 
 
 
138
  try:
139
  with open(filename, "r", encoding='utf-8') as f:
140
  forbidden_names = [line.strip() for line in f]
 
145
 
146
 
147
  def check_name_similarity(name: str, forbidden_names: list, threshold: int = 80) -> bool:
148
+ """Checks if a name is similar to any forbidden name."""
 
 
 
 
 
 
 
 
 
149
  from fuzzywuzzy import fuzz
150
  for forbidden_name in forbidden_names:
151
  similarity_ratio = fuzz.ratio(name.lower(), forbidden_name.lower())
 
154
  return True
155
  return False
156
 
157
+
158
  # --- Gradio UI ---
159
 
160
  with gr.Blocks() as app:
161
  with gr.Row():
162
  start_date = Calendar(type="datetime", label="1. Select Start Date")
163
  end_date = Calendar(type="datetime", label="2. Select End Date")
164
+ date_language_input = create_language_dropdown("3. Date Word Language", default_value=DEFAULT_LANGUAGE)
165
  search_word = gr.Textbox(label="4. Search Word")
166
 
167
  with gr.Row():
 
175
  end = gr.Number(label="End Book", value=39)
176
  step = gr.Number(label="Jump Width (Steps) for ELS")
177
  rounds = gr.Number(label="Rounds through Books", value=1)
178
+ float_step = gr.Number(visible=False, value=1)
179
  half_step_btn = gr.Button("Steps / 2")
180
  double_step_btn = gr.Button("Steps * 2")
181
 
 
183
  round_x = gr.Number(label="Round (x)", value=1)
184
  round_y = gr.Number(label="Round (y)", value=-1)
185
 
186
+ average_combine_chk = gr.Checkbox(label="Average-Combine Combined Rounds", value=False)
187
  mirror_book_numbers = gr.Checkbox(label="Mirror book numbers for negative rounds (axis=book 20)", value=False)
188
 
189
  rounds_combination = gr.Textbox(label="Combined Rounds", value="1,-1")
190
 
 
191
  with gr.Row():
192
  length = gr.Number(label="Result Length (0=inf)", value=0)
193
+ tlang = create_language_dropdown("Target Language for Translation", default_value=DEFAULT_LANGUAGE)
194
  strip_spaces = gr.Checkbox(label="Strip Spaces from Books", value=True)
195
  strip_in_braces = gr.Checkbox(label="Strip Text in Braces from Books", value=True)
196
  strip_diacritics_chk = gr.Checkbox(label="Strip Diacritics from Books", value=True)
197
  acknowledgment_chk = gr.Checkbox(
198
+ label="The User hereby accepts that the User will not harm or stalk anyone with this information, or bet on any of this information, in any regards.",
199
  value=True
200
  )
201
 
202
  translate_btn = gr.Button("7. Search with ELS")
203
 
204
+ results_output = gr.Dataframe(headers=['Date', 'Book Result', 'Result'], label="Results")
205
  json_output = gr.Textbox(label="JSON Configuration Output")
206
  json_download_btn = gr.Button("Prepare .json for Download")
207
  json_file = gr.File(label="Download Config JSON", file_count="single")
 
210
 
211
  forbidden_names = load_forbidden_names()
212
 
213
+
214
  # --- Event Handlers ---
215
 
216
  def update_rounds_combination(round_x: int, round_y: int) -> str:
217
+ """Updates the rounds_combination textbox based on round_x and round_y."""
 
 
 
 
 
 
 
 
218
  return f"{int(round_x)},{int(round_y)}"
219
 
220
 
221
  def calculate_journal_sum(text: str, date_words: str) -> tuple:
222
+ """Calculates the journal sum and updates the step value."""
 
 
 
 
 
 
 
 
223
  if check_name_similarity(text, forbidden_names):
224
+ return 0, 0, 0
225
  if check_name_similarity(date_words, forbidden_names):
226
+ return 0, 0, 0
227
  sum_value = calculate_gematria_sum(text, date_words)
228
+ return sum_value, sum_value, sum_value
 
 
 
229
 
 
 
230
 
231
+ def update_step_half(float_step: float) -> tuple:
232
+ """Updates the step value to half."""
 
233
  new_step = math.ceil(float_step / 2)
234
  return new_step, float_step / 2
235
 
 
 
 
 
 
236
 
237
+ def update_step_double(float_step: float) -> tuple:
238
+ """Updates the step value to double."""
 
239
  new_step = math.ceil(float_step * 2)
240
  return new_step, float_step * 2
241
 
242
+
243
  # Update rounds_combination when round_x or round_y changes
244
  round_x.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
245
  round_y.change(update_rounds_combination, inputs=[round_x, round_y], outputs=rounds_combination)
246
 
247
 
248
  def handle_json_download(config_json: str, step: int, rounds: int, strip_spaces: bool,
249
+ strip_in_braces: bool, strip_diacritics_chk: bool) -> str:
250
+ """Handles the download of the JSON config file."""
 
 
 
 
 
 
 
 
 
 
 
 
251
  return download_json_file(config_json, step, rounds, strip_spaces, strip_in_braces, strip_diacritics_chk)
252
 
253
+
254
  def perform_search_and_create_json(start_date: datetime, end_date: datetime, date_language_input: str,
255
+ search_word: str, start: int, end: int, step: int, rounds: int, length: int,
256
+ tlang: str, strip_spaces: bool, strip_in_braces: bool,
257
+ strip_diacritics_chk: bool,
258
+ gematria_text: str, average_combine: bool) -> tuple:
259
+ """Performs the ELS search for each date in the range, creates the JSON config, and displays the results."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  all_results = []
261
  delta = timedelta(days=1)
262
+ original_start_date = start_date
263
+ total_steps = 0
264
+
265
+ # Translate the search word to Yiddish ONLY ONCE (outside the loop)
266
+ translator_yi = GoogleTranslator(source='auto', target='yi')
267
+ search_word_yiddish = translator_yi.translate(search_word)
268
+
269
+ seen_dates = set() # Keep track of processed dates
270
+
271
  while start_date <= end_date:
272
+ date_words_output = date_to_words(start_date.strftime("%Y-%m-%d"))
273
+
274
+ # Only translate if the date language is not English
275
+ if date_language_input.lower() != DEFAULT_LANGUAGE:
276
+ date_words_output = translate_date_to_words(start_date, date_language_input)
277
+
278
+ # Skip if date has already been processed
279
+ if date_words_output in seen_dates:
280
+ start_date += delta
281
+ continue
282
+ seen_dates.add(date_words_output)
283
+
284
+ journal_sum, _, _ = calculate_journal_sum(gematria_text, date_words_output)
285
+ step = journal_sum
286
+ total_steps += step
287
 
288
  filtered_results = perform_els_search(start, end, step, rounds, length, tlang, strip_spaces,
289
+ strip_in_braces, strip_diacritics_chk, average_combine,
290
+ search_word_yiddish, # Pass the translated Yiddish word
291
  date_words_output)
292
+
293
+ # Only add the first result for each date
294
+ if filtered_results:
295
+ all_results.append(filtered_results[0])
296
+
297
  start_date += delta
298
 
299
+ # Process results after the loop completes
300
  if all_results:
301
  df = pd.DataFrame(all_results)
302
 
303
+ # Deduplicate steps
304
+ seen_steps = set()
305
+ deduplicated_results = []
306
+ for result in all_results:
307
+ step_key = (result['Date'], result['Book Result'])
308
+ if step_key not in seen_steps:
309
+ deduplicated_results.append(result)
310
+ seen_steps.add(step_key)
311
+ df = pd.DataFrame(deduplicated_results)
312
+
313
  # Translate the 'Book Result' column to the target language
314
+ translator = GoogleTranslator(source='yi', target=tlang)
315
  df['Result'] = df['Book Result'].apply(translator.translate)
316
 
317
+ config_json = generate_json_dump(start, end, total_steps, rounds, length, tlang, strip_spaces,
318
+ strip_in_braces, strip_diacritics_chk, gematria_text, df, search_word,
319
+ original_start_date, end_date)
320
  return config_json, df
321
  else:
322
+ return "No results found.", None
323
+
324
 
325
  gematria_btn.click(
326
  calculate_journal_sum,
327
+ inputs=[gematria_text, date_language_input],
328
  outputs=[gematria_result, step, float_step]
329
  )
330
 
 
342
 
343
  translate_btn.click(
344
  perform_search_and_create_json,
345
+ inputs=[start_date, end_date, date_language_input, search_word, start, end, step, rounds_combination, length,
346
+ tlang, strip_spaces,
347
  strip_in_braces, strip_diacritics_chk, gematria_text, average_combine_chk],
348
  outputs=[json_output, results_output]
349
  )
 
355
  )
356
 
357
  if __name__ == "__main__":
358
+ app.launch(share=False)
temuraeh.py DELETED
@@ -1,59 +0,0 @@
1
- import json
2
- # Implementemos la función de temurah con el alfabeto completo y probemos la conversión de "Baphomet" a "Sofia"
3
- # en hebreo usando temurah.
4
- # Nota: La representación exacta de "Baphomet" y "Sofia" en hebreo puede variar debido a interpretaciones,
5
- # pero aquí usaremos transliteraciones aproximadas para ilustrar cómo podría hacerse.
6
-
7
- def temurah(text, hebrew_alphabet='אבגדהוזחטיכלמנסעפצקרשת', reverse=False):
8
- """
9
- Aplica la temurah a un texto hebreo utilizando todo el alfabeto hebreo.
10
- El esquema de ejemplo simplemente invierte el orden del alfabeto.
11
- """
12
- # Invertir el alfabeto si se solicita
13
- if reverse:
14
- hebrew_alphabet = hebrew_alphabet[::-1]
15
-
16
- # Generar el alfabeto invertido
17
- inverted_alphabet = hebrew_alphabet[::-1]
18
-
19
- # Crear el diccionario de mapeo para temurah
20
- temurah_mapping = {orig: inv for orig, inv in zip(hebrew_alphabet, inverted_alphabet)}
21
-
22
- # Aplicar temurah al texto
23
- temurah_text = ''.join(temurah_mapping.get(char, char) for char in text)
24
-
25
- return temurah_text
26
-
27
- # Definir el alfabeto hebreo
28
- hebrew_alphabet = 'אבגדהוזחטיכלמנסעפצקרשת'
29
-
30
- # Texto de ejemplo: "Baphomet" y "Sofia" en hebreo
31
- # Es importante notar que la transliteración directa de nombres propios o términos específicos entre idiomas
32
- # puede no ser directa o puede requerir ajustes basados en la fonética o el uso histórico.
33
-
34
- # Por simplificación, supongamos transliteraciones hipotéticas para "Baphomet" a "Sofia":
35
- # Estas transliteraciones son ejemplos y pueden no reflejar transliteraciones precisas.
36
- baphomet_hebrew = 'בפומת' # Esta es una transliteración hipotética para "Baphomet"
37
- sofia_hebrew = 'סופיא' # Esta es una transliteración hipotética para "Sofia"
38
-
39
-
40
- jesus ="ישוע"
41
- christ = ""
42
-
43
- print(temurah(jesus,hebrew_alphabet))
44
- # Aplicar temurah al texto hipotético de "Baphomet"
45
- temurah_baphomet = temurah(baphomet_hebrew, hebrew_alphabet)
46
-
47
- # Mostrar resultados
48
-
49
- print(temurah_baphomet+"\n"+sofia_hebrew)
50
-
51
-
52
-
53
-
54
- genesis = json.loads(open("genesis.json","r").read())["text"][0]
55
-
56
- ##example_text = "בראשית ברא אלהים את השמים ואת הארץ" # "En el principio Dios creó los cielos y la tierra."
57
- #for txt in genesis:
58
- # print(temurah(txt,hebrew_alphabet))
59
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py CHANGED
@@ -8,8 +8,20 @@ import inflect
8
  from datetime import datetime
9
  from deep_translator import GoogleTranslator
10
 
11
- def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True,
12
- strip_in_braces=True, strip_diacritics=True, average_compile=False, translate_results=False):
 
 
 
 
 
 
 
 
 
 
 
 
13
  """Processes JSON files to extract and process text.
14
 
15
  Args:
@@ -23,7 +35,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
23
  strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
24
  strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
25
  average_compile (bool, optional): Whether to average-combine results. Defaults to False.
26
- translate_results (bool, optional): Whether to translate the results. Defaults to False.
27
 
28
  Returns:
29
  list: A list of processed results.
@@ -57,19 +69,19 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
57
  clean_text = clean_text.replace(" ", " ")
58
 
59
  text_length = len(clean_text)
60
-
61
  selected_characters_per_round = {}
62
  for round_num in map(int, rounds.split(',')):
63
  # Handle cases where no characters should be selected
64
  if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
65
  # Corrected logic for negative rounds and step = 1
66
  if round_num > 0:
67
- current_position = step - 1
68
  else:
69
  current_position = text_length - 1 if step == 1 else text_length - step
70
 
71
  completed_rounds = 0
72
- selected_characters = ""
73
 
74
  while completed_rounds < abs(round_num):
75
  selected_characters += clean_text[current_position % text_length]
@@ -78,16 +90,17 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
78
  current_position += step if round_num > 0 else -step
79
 
80
  if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
81
- (round_num < 0 and current_position < 0):
82
  completed_rounds += 1
83
 
84
  selected_characters_per_round[round_num] = selected_characters
85
-
86
  if average_compile and len(selected_characters_per_round) > 1:
87
  result_text = ""
88
  keys = sorted(selected_characters_per_round.keys())
89
  for i in range(len(keys) - 1):
90
- result_text = average_gematria(selected_characters_per_round[keys[i]], selected_characters_per_round[keys[i+1]])
 
91
  else:
92
  result_text = ''.join(selected_characters_per_round.values())
93
 
@@ -103,7 +116,7 @@ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip
103
  "title": data["title"],
104
  "els_result_text": result_text,
105
  "els_result_gematria": calculate_gematria(result_text),
106
- "translated_text": translated_text
107
  })
108
 
109
  except FileNotFoundError:
 
8
  from datetime import datetime
9
  from deep_translator import GoogleTranslator
10
 
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ import json
16
+ import re
17
+ from gematria import calculate_gematria
18
+ import inflect
19
+ from datetime import datetime
20
+ from deep_translator import GoogleTranslator
21
+
22
+
23
+ def process_json_files(start, end, step, rounds="1", length=0, tlang="en", strip_spaces=True,
24
+ strip_in_braces=True, strip_diacritics=True, average_compile=False, translate_results=False):
25
  """Processes JSON files to extract and process text.
26
 
27
  Args:
 
35
  strip_in_braces (bool, optional): Whether to strip text in braces. Defaults to True.
36
  strip_diacritics (bool, optional): Whether to strip diacritics. Defaults to True.
37
  average_compile (bool, optional): Whether to average-combine results. Defaults to False.
38
+ translate_results (bool, optional): Whether to translate the results. Defaults to False.
39
 
40
  Returns:
41
  list: A list of processed results.
 
69
  clean_text = clean_text.replace(" ", " ")
70
 
71
  text_length = len(clean_text)
72
+
73
  selected_characters_per_round = {}
74
  for round_num in map(int, rounds.split(',')):
75
  # Handle cases where no characters should be selected
76
  if not (round_num == 1 and step > text_length) and not (round_num == -1 and step > text_length):
77
  # Corrected logic for negative rounds and step = 1
78
  if round_num > 0:
79
+ current_position = step - 1
80
  else:
81
  current_position = text_length - 1 if step == 1 else text_length - step
82
 
83
  completed_rounds = 0
84
+ selected_characters = ""
85
 
86
  while completed_rounds < abs(round_num):
87
  selected_characters += clean_text[current_position % text_length]
 
90
  current_position += step if round_num > 0 else -step
91
 
92
  if (round_num > 0 and current_position >= text_length * (completed_rounds + 1)) or \
93
+ (round_num < 0 and current_position < 0):
94
  completed_rounds += 1
95
 
96
  selected_characters_per_round[round_num] = selected_characters
97
+
98
  if average_compile and len(selected_characters_per_round) > 1:
99
  result_text = ""
100
  keys = sorted(selected_characters_per_round.keys())
101
  for i in range(len(keys) - 1):
102
+ result_text = average_gematria(selected_characters_per_round[keys[i]],
103
+ selected_characters_per_round[keys[i + 1]])
104
  else:
105
  result_text = ''.join(selected_characters_per_round.values())
106
 
 
116
  "title": data["title"],
117
  "els_result_text": result_text,
118
  "els_result_gematria": calculate_gematria(result_text),
119
+ "translated_text": translated_text
120
  })
121
 
122
  except FileNotFoundError: