bartman081523 commited on
Commit
d34824e
·
1 Parent(s): 420de2c

translation cache for app version

Browse files
Files changed (2) hide show
  1. app.py +109 -78
  2. translation_cache.db +3 -0
app.py CHANGED
@@ -5,13 +5,12 @@ import time
5
  import requests
6
  import pytz
7
  import unittest
 
8
 
9
  import gradio as gr
10
- from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index
11
-
12
- import logging
13
  from deep_translator import GoogleTranslator
14
  from deep_translator.exceptions import NotValidLength, RequestError
 
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -20,6 +19,26 @@ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %
20
  TANACH_DATA = process_json_files(1, 39)
21
  WORD_INDEX = build_word_index(TANACH_DATA)
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # --- Utility Functions ---
24
 
25
  def get_current_word_data(client_time_str):
@@ -43,46 +62,48 @@ def get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=True):
43
  chapter_text = TANACH_DATA[book_id]["text"][chapter_id]
44
  flattened_chapter = flatten_text_with_line_breaks(chapter_text)
45
 
46
- # Highlight the word *before* joining with <br>
47
- if highlight_word and 0 <= verse_id - 1 < len(flattened_chapter):
48
- flattened_chapter[verse_id - 1] = \
49
- f"<span class='highlight'>{flattened_chapter[verse_id - 1]}</span>"
50
-
51
  return '<br>'.join(flattened_chapter)
52
 
 
 
53
 
54
- def translate_verse(hebrew_verse, highlight_word=True):
55
- """Translates a Hebrew verse to English, splitting into chunks if necessary."""
56
- try:
57
- translator = GoogleTranslator(source='iw', target='en')
58
- max_length = 2000 # Slightly below the limit to be safe
59
- translated_text = ""
60
 
61
- # Split the verse into chunks smaller than the max length
62
- chunks = [hebrew_verse[i:i + max_length] for i in range(0, len(hebrew_verse), max_length)]
 
 
 
 
63
 
64
- for chunk_index, chunk in enumerate(chunks):
65
- # Translate the current chunk
66
- translated_chunk = translator.translate(chunk)
67
 
68
- # If it's not the first chunk, find the last line break and start from there
69
- if chunk_index > 0:
70
- last_line_break = translated_chunk.rfind('<br>', 0, 100) # Find last <br> in first 100 chars
71
- if last_line_break != -1:
72
- translated_text += translated_chunk[last_line_break + 4:] # Add from after <br>
73
- else:
74
- translated_text += translated_chunk
75
- else:
76
- translated_text += translated_chunk
77
 
78
- return translated_text
 
79
 
80
- except RequestError as e:
81
- logging.warning(f"Translation failed: Request Error - {e}")
82
- return "Translation unavailable: Request Error"
83
 
 
 
 
 
 
 
84
 
85
- # --- Gradio Interface ---
 
 
 
 
86
 
87
  def update_tanach_display(client_time_str, timezone):
88
  """Updates the Gradio interface with client time, verse info, and translations."""
@@ -118,56 +139,66 @@ def update_tanach_display(client_time_str, timezone):
118
  """
119
 
120
  # Get and format Hebrew and English verses
121
- hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id)
122
- english_verse = translate_verse('\n'.join(hebrew_verse.split('<br>')), highlight_word=False)
123
 
124
- return verse_info, hebrew_verse, english_verse
 
 
125
 
 
 
 
126
 
127
- # --- Gradio Interface ---
 
 
 
 
128
 
 
129
  with gr.Blocks(css="""
130
- .container {
131
- display: flex;
132
- flex-direction: column;
133
- align-items: center;
134
- font-family: 'Times New Roman', serif;
135
- }
136
- /* Add this highlight class styling */
137
- .highlight {
138
- background-color: #FFFF00; /* Yellow highlight */
139
- padding: 2px 5px;
140
- border-radius: 5px;
141
- }
142
- #verse-info {
143
- margin-bottom: 20px;
144
- text-align: center;
145
- }
146
- #verses {
147
- display: flex;
148
- flex-direction: row;
149
- justify-content: center;
150
- align-items: flex-start;
151
- gap: 50px;
152
- }
153
- #hebrew-verse {
154
- font-size: 18px;
155
- line-height: 1.5;
156
- margin-bottom: 20px;
157
- text-align: right;
158
- direction: rtl;
159
- }
160
- #english-verse {
161
- font-size: 18px;
162
- line-height: 1.5;
163
- margin-bottom: 20px;
164
- }
165
  """) as iface:
166
  with gr.Row():
167
  timezone_input = gr.Dropdown(
168
- choices=[tz for tz in pytz.common_timezones],
169
- label="Select Your Timezone",
170
- value="UTC" # Set a default timezone
171
  )
172
 
173
  with gr.Row():
@@ -183,9 +214,9 @@ with gr.Blocks(css="""
183
 
184
  # Update the display with verse information and translations when the button is clicked
185
  advance_button.click(
186
- fn=lambda tz: update_tanach_display(datetime.datetime.now(pytz.timezone(tz)).strftime("%H:%M:%S"), tz),
187
- inputs=[timezone_input],
188
- outputs=[verse_info_output, hebrew_verse_output, english_verse_output],
189
  )
190
 
191
  class TestWordIndex(unittest.TestCase):
 
5
  import requests
6
  import pytz
7
  import unittest
8
+ import sqlite3 # Import sqlite3 for database handling
9
 
10
  import gradio as gr
 
 
 
11
  from deep_translator import GoogleTranslator
12
  from deep_translator.exceptions import NotValidLength, RequestError
13
+ from utils import process_json_files, flatten_text_with_line_breaks, calculate_tanach_statistics, build_word_index
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 
19
  TANACH_DATA = process_json_files(1, 39)
20
  WORD_INDEX = build_word_index(TANACH_DATA)
21
 
22
+ # --- Database Setup ---
23
+ # Use a connection function to ensure each thread gets its own connection
24
+ def get_db_connection():
25
+ conn = sqlite3.connect('translation_cache.db')
26
+ conn.row_factory = sqlite3.Row # This line allows accessing columns by name
27
+ return conn
28
+
29
+ # Create the database table if it doesn't exist
30
+ with get_db_connection() as conn:
31
+ cursor = conn.cursor()
32
+ cursor.execute('''
33
+ CREATE TABLE IF NOT EXISTS translations (
34
+ book_id INTEGER,
35
+ chapter_id INTEGER,
36
+ english_text TEXT,
37
+ PRIMARY KEY (book_id, chapter_id)
38
+ )
39
+ ''')
40
+ conn.commit()
41
+
42
  # --- Utility Functions ---
43
 
44
  def get_current_word_data(client_time_str):
 
62
  chapter_text = TANACH_DATA[book_id]["text"][chapter_id]
63
  flattened_chapter = flatten_text_with_line_breaks(chapter_text)
64
 
 
 
 
 
 
65
  return '<br>'.join(flattened_chapter)
66
 
67
+ def translate_chapter(hebrew_chapter, book_id, chapter_id):
68
+ """Translates a Hebrew chapter to English, caching the result in the database."""
69
 
70
+ # Get a new database connection for this thread
71
+ with get_db_connection() as conn:
72
+ cursor = conn.cursor()
 
 
 
73
 
74
+ # Check if translation exists in the database
75
+ cursor.execute(
76
+ "SELECT english_text FROM translations WHERE book_id=? AND chapter_id=?",
77
+ (book_id, chapter_id)
78
+ )
79
+ result = cursor.fetchone()
80
 
81
+ if result:
82
+ return result['english_text'].split('\n') # Retrieve from database and split into lines
 
83
 
84
+ try:
85
+ translator = GoogleTranslator(source='iw', target='en')
86
+ max_length = 2000 # Slightly below the limit to be safe
87
+ translated_text = ""
 
 
 
 
 
88
 
89
+ # Split the chapter into chunks smaller than the max length
90
+ chunks = [hebrew_chapter[i:i + max_length] for i in range(0, len(hebrew_chapter), max_length)]
91
 
92
+ for chunk in chunks:
93
+ translated_text += translator.translate(chunk)
 
94
 
95
+ # Store the translation in the database
96
+ cursor.execute(
97
+ "INSERT INTO translations (book_id, chapter_id, english_text) VALUES (?, ?, ?)",
98
+ (book_id, chapter_id, translated_text)
99
+ )
100
+ conn.commit()
101
 
102
+ return translated_text.split('\n') # Return as list of lines
103
+
104
+ except RequestError as e:
105
+ logging.warning(f"Translation failed: Request Error - {e}")
106
+ return ["Translation unavailable: Request Error"]
107
 
108
  def update_tanach_display(client_time_str, timezone):
109
  """Updates the Gradio interface with client time, verse info, and translations."""
 
139
  """
140
 
141
  # Get and format Hebrew and English verses
142
+ hebrew_verse = get_formatted_verse(book_id, chapter_id, verse_id, highlight_word=False)
143
+ hebrew_verses = hebrew_verse.split("<br>")
144
 
145
+ # Translate the entire chapter and get the correct verse
146
+ hebrew_chapter = flatten_text_with_line_breaks(TANACH_DATA[book_id]["text"][chapter_id])
147
+ english_chapter = translate_chapter('\n'.join(hebrew_chapter), book_id, chapter_id)
148
 
149
+ # Highlight the current verse in both Hebrew and English
150
+ hebrew_verses[verse_id - 1] = f"<span class='highlight'>{hebrew_verses[verse_id - 1]}</span>"
151
+ english_chapter[verse_id - 1] = f"<span class='highlight'>{english_chapter[verse_id - 1]}</span>"
152
 
153
+ # Join the verses back with <br> for display
154
+ hebrew_verse = "<br>".join(hebrew_verses)
155
+ english_verse = "<br>".join(english_chapter)
156
+
157
+ return verse_info, hebrew_verse, english_verse
158
 
159
+ # --- Gradio Interface ---
160
  with gr.Blocks(css="""
161
+ .container {
162
+ display: flex;
163
+ flex-direction: column;
164
+ align-items: center;
165
+ font-family: 'Times New Roman', serif;
166
+ }
167
+ /* Add this highlight class styling */
168
+ .highlight {
169
+ background-color: #FFFF00; /* Yellow highlight */
170
+ padding: 2px 5px;
171
+ border-radius: 5px;
172
+ }
173
+ #verse-info {
174
+ margin-bottom: 20px;
175
+ text-align: center;
176
+ }
177
+ #verses {
178
+ display: flex;
179
+ flex-direction: row;
180
+ justify-content: center;
181
+ align-items: flex-start;
182
+ gap: 50px;
183
+ }
184
+ #hebrew-verse {
185
+ font-size: 18px;
186
+ line-height: 1.5;
187
+ margin-bottom: 20px;
188
+ text-align: right;
189
+ direction: rtl;
190
+ }
191
+ #english-verse {
192
+ font-size: 18px;
193
+ line-height: 1.5;
194
+ margin-bottom: 20px;
195
+ }
196
  """) as iface:
197
  with gr.Row():
198
  timezone_input = gr.Dropdown(
199
+ choices=[tz for tz in pytz.common_timezones],
200
+ label="Select Your Timezone",
201
+ value="UTC" # Set a default timezone
202
  )
203
 
204
  with gr.Row():
 
214
 
215
  # Update the display with verse information and translations when the button is clicked
216
  advance_button.click(
217
+ fn=lambda tz: update_tanach_display(datetime.datetime.now(pytz.timezone(tz)).strftime("%H:%M:%S"), tz),
218
+ inputs=[timezone_input],
219
+ outputs=[verse_info_output, hebrew_verse_output, english_verse_output],
220
  )
221
 
222
  class TestWordIndex(unittest.TestCase):
translation_cache.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5785f0efbbb8d92d5fcd3f09ebebfaa18bf67d7cf76e6bf815a68960dca4d806
3
+ size 1363968