|
from deep_translator import GoogleTranslator |
|
import torahcodes.resources.func.utils as util |
|
from hebrew_numbers import gematria_to_int |
|
from textblob import TextBlob |
|
from os import listdir |
|
from os.path import isfile, join |
|
import re |
|
import time |
|
import random |
|
import os |
|
import json |
|
|
|
BLUE, RED, WHITE, YELLOW, MAGENTA, GREEN, END = '\33[1;94m', '\033[1;91m', '\33[1;97m', '\33[1;93m', '\033[1;35m', '\033[1;32m', '\033[0m' |
|
ORANGE = '\033[1;33m' |
|
|
|
|
|
data_dir = "resources/texts" |
|
|
|
class BibleBooks(): |
|
def __init__(self): |
|
self.folder = data_dir |
|
self.book = {} |
|
def load(self): |
|
|
|
for f in listdir(self.folder): |
|
print(f) |
|
if isfile(join(self.folder, f)) and f.endswith(".json"): |
|
fn = f.split('.') |
|
|
|
with open(self.folder+f, encoding="utf-8-sig") as File: |
|
self.book[fn[0]] = File.read() |
|
|
|
def rawdata(self, bookname): |
|
return self.book[bookname] |
|
|
|
def booklist(self): |
|
return list(self.book.keys()) |
|
|
|
books = BibleBooks() |
|
|
|
class Torah(): |
|
def __init__(self): |
|
self.book = '' |
|
self.gcode = { |
|
'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 600, |
|
'k': 10, 'l': 20, 'm': 30, 'n': 40, 'o': 50, 'p': 60, 'q': 70, 'r': 80, 's': 90, |
|
't': 100, 'u': 200, 'v': 700, 'w': 900, 'x': 300, 'y': 400, 'z': 500 |
|
} |
|
|
|
def loadbooks(self): |
|
books.load() |
|
|
|
def func_getnumber(self, listL, listW): |
|
return util.fn_GetNumberValues(listL, listW) |
|
|
|
def func_checklang(self, word, lang): |
|
b = TextBlob(word) |
|
|
|
try: |
|
b.detect_language() |
|
if (b.detect_language() == lang): |
|
return True |
|
except: |
|
return True |
|
return False |
|
|
|
def numtobook(self, number): |
|
for x in books.booklist(): |
|
xt = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", x) |
|
if xt[0] == str(number): |
|
return x |
|
|
|
def func_translate(self, lang_in, lang_out, data): |
|
translated = GoogleTranslator(source=lang_in, target=lang_out).translate(data.strip()) |
|
return translated |
|
|
|
def gematria(self, word: str) -> int: |
|
try: |
|
if word.isdigit(): |
|
return int(word) |
|
|
|
|
|
letters = [char for char in word if char.isalpha()] |
|
numbers = [int(char) for char in word if char.isdigit()] |
|
|
|
|
|
letters_value = sum([self.gcode[char] for char in letters if char in self.gcode]) |
|
|
|
|
|
|
|
total_value = letters_value + sum(numbers) |
|
|
|
return total_value |
|
except: |
|
print(word) |
|
raise ValueError |
|
|
|
|
|
def gematrix(self, phrase: str) -> int: |
|
phrase = self.strip_accents(phrase.lower()) |
|
phrase = ''.join([i for i in phrase if i.isalpha() or i.isdigit() or i.isspace()]) |
|
|
|
|
|
elements = phrase.split() |
|
total_value = 0 |
|
|
|
for element in elements: |
|
if element.isalpha(): |
|
|
|
total_value += sum([self.gcode[char] for char in element if char in self.gcode]) |
|
elif element.isdigit(): |
|
|
|
total_value += int(element) |
|
|
|
return total_value |
|
|
|
|
|
|
|
|
|
|
|
|
|
def strip_accents(self, s): |
|
try: |
|
return ''.join( |
|
c for c in unicodedata.normalize('NFD', s) |
|
if unicodedata.category(c) != 'Mn' |
|
) |
|
except: |
|
return s |
|
|
|
|
|
def gematria_iw_int(text): |
|
return gematria_to_int(text) |
|
|
|
|
|
def func_ParseTranslation(self, translated, lang, active): |
|
abd = 'abcdefghijklmnñopqrstuvwxyz1234567890' |
|
str_split = translated.split(' ') |
|
str_final = '' |
|
for word in str_split: |
|
try: |
|
if word[0].lower() in abd: |
|
if active == 'true': |
|
if self.func_checklang(word, lang) == True: |
|
str_final = str_final+ word+' ' |
|
else: |
|
str_final = str_final+ word+' ' |
|
except: |
|
pass |
|
|
|
if not str_final == '': |
|
return str_final |
|
else: |
|
return 0 |
|
def els(self, namebook, number, tracert='false', visualice=False): |
|
space = number |
|
abd = 'abcdefghijklmnñopqrstuvwxyz' |
|
i=1 |
|
rese="" |
|
totalvalue = 0 |
|
D = self.GetDataBook(namebook) |
|
for (z,b,y) in D: |
|
try: |
|
charnum = 0 |
|
res="" |
|
|
|
for char in D[z,b,y]: |
|
charnum = charnum+1 |
|
if (i % int(space)) == 0: |
|
if tracert == 'true': |
|
totalvalue = totalvalue + int(charnum) |
|
print('Source:',int(z),'chapter:', int(b),'Verse:', int(y),'CharNum:',int(charnum),'Char:', char) |
|
|
|
res=res+char |
|
|
|
i=i+1 |
|
rese=rese+" "+res |
|
except: |
|
pass |
|
|
|
ret = re.sub('\s+', ' ', rese.strip()) |
|
return ret, totalvalue |
|
|
|
def GetDataBook(self, bibleNumberBook): |
|
|
|
|
|
JSON = books.rawdata(bibleNumberBook) |
|
ListOfJSONStringsParsed, ListOfJSONStringsParsedWithSpaces = util.fn_TextFilePreprocess(JSON) |
|
ListOfDictsOfJSONStringsParsed, ListOfDictsOfJSONStringsParsedWithSpaces = util.fn_ConvertJSONStringsToDicts(ListOfJSONStringsParsed, ListOfJSONStringsParsedWithSpaces) |
|
SearchTextChosen = util.fn_GetNumberOfTextChosen(ListOfDictsOfJSONStringsParsed) |
|
ZippedTupleNoSpaces, ZippedTupleWithSpaces = util.fn_ZippedTupleCreate(ListOfDictsOfJSONStringsParsed, ListOfDictsOfJSONStringsParsedWithSpaces, SearchTextChosen) |
|
D, DS = util.fn_DictionaryOfVersesCreate(ZippedTupleNoSpaces, ZippedTupleWithSpaces) |
|
S, L, DL, D5, ListOfWords = util.fn_DataObjectsCreate(D, DS) |
|
N, NW = util.fn_GetNumberValues(S, ListOfWords) |
|
ListOfIndexesCustom = util.fn_ListOfIndexesCustomCreate(D5) |
|
W = util.fn_TupleOfWordsAndGematriaValuesCreate(ListOfWords, NW) |
|
|
|
return D |
|
|
|
|
|
def process_json_files(start, end, step, length=0, tlang="en", spaces_include=False, strip_in_braces=True, strip_diacritics=True): |
|
base_path = "resources/texts" |
|
translator = GoogleTranslator(source='auto', target=tlang) |
|
results = [] |
|
|
|
for i in range(start, end + 1): |
|
file_name = f"{base_path}/{i:02}.json" |
|
try: |
|
with open(file_name, 'r', encoding='utf-8') as file: |
|
data = json.load(file) |
|
text_blocks = data["text"] |
|
|
|
full_text = "" |
|
for block in text_blocks: |
|
full_text += ' '.join(block) |
|
|
|
clean_text = full_text |
|
if strip_in_braces: |
|
clean_text = re.sub(r"\[.*?\]", "", clean_text, flags=re.DOTALL) |
|
if strip_diacritics: |
|
clean_text = re.sub(r"[^\u05D0-\u05EA ]+", "", clean_text) |
|
if not spaces_include: |
|
clean_text = clean_text.replace(" ", "") |
|
|
|
if length != 0: |
|
selected_characters = clean_text[step - 1::step][:length] |
|
else: |
|
selected_characters = clean_text[step - 1::step] |
|
|
|
translated_text = translator.translate(''.join(selected_characters)) |
|
if selected_characters != "": |
|
results.append({ |
|
"book": i, |
|
"title": data["title"], |
|
"original_text": selected_characters, |
|
"translated_text": translated_text |
|
}) |
|
|
|
except FileNotFoundError: |
|
results.append({"error": f"File {file_name} not found."}) |
|
except json.JSONDecodeError: |
|
results.append({"error": f"File {file_name} could not be read as JSON."}) |
|
except KeyError: |
|
results.append({"error": f"Expected key 'text' is missing in {file_name}."}) |
|
|
|
return results |
|
|