Spaces:
Runtime error
Runtime error
| import re | |
| import sqlite3 | |
| from flask import g | |
| from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer | |
| model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B") # Setting the model to use | |
| tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B") # Setting the tokenizer to use | |
| # Main function of the translation feature. Performs translation! | |
| def translate_text(input_text, source_language, target_language): | |
| # Grabs the source language to be used in the tokenizer | |
| tokenizer.src_lang = source_language | |
| # Check if the input is empty | |
| if not input_text.strip(): | |
| raise ValueError("Empty input!") | |
| # Validate that the input is in the correct format | |
| if not validate_input(input_text): | |
| raise ValueError("Incorrect format!") | |
| # Creates encoded text | |
| encoded_text = tokenizer(input_text, return_tensors="pt") | |
| # Generates new tokens using encoded text from source language | |
| generated_tokens = model.generate(**encoded_text, forced_bos_token_id=tokenizer.get_lang_id(target_language), max_new_tokens=512) | |
| # Decode generated tokens to display translated text | |
| translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] | |
| return translated_text | |
| # Helper function for displaying appropriate language names in flash messages | |
| # Note: Python does not have a built-in switch function, so this is just a rough implementation of the logic | |
| def switch(lang): | |
| if lang == "en": | |
| return "English" | |
| elif lang == "zh": | |
| return "Chinese" | |
| elif lang == "ms": | |
| return "Malay" | |
| elif lang == "ta": | |
| return "Tamil" | |
| elif lang == "th": | |
| return "Thai" | |
| # User Input Format Validation Function for all 4 languages | |
| def validate_input(input_text): | |
| # Pattern for English language | |
| pattern_en = r'As a (?P<role>[^,.]+), I want to (?P<goal>[^,.]+)(,|.)+so that (?P<benefit>.+)' | |
| # Pattern for Chinese language | |
| pattern_zh = r'作为(?P<role>[^,.]+),我想要(?P<goal>[^,.]+)(,|。)+以便(?P<benefit>.+)' | |
| # Pattern for Malay language | |
| pattern_ms = r'Sebagai(?P<role>[^,.]+), saya mahu(?P<goal>[^,.]+)(,|.)+supaya(?P<benefit>.+)' | |
| # Pattern for Tamil language | |
| pattern_ta = r'என(?P<role>[^,.]+) எனக்கு வேண்டும்(?P<goal>[^,.]+)(,|.)+அதனால்(?P<benefit>.+) பயன்படுத்தி வைக்கும்' | |
| # Pattern for Thai language | |
| pattern_th = r'ในฐานะ(?P<role>[^,.]+) ฉันต้องการ(?P<goal>[^,.]+)(,|.)+เพื่อที่ฉัน(?P<benefit>.+)' | |
| # Try each pattern to see if there is a match | |
| match_en = re.search(pattern_en, input_text, flags=re.DOTALL) | |
| match_zh = re.search(pattern_zh, input_text, flags=re.DOTALL) | |
| match_ms = re.search(pattern_ms, input_text, flags=re.DOTALL) | |
| match_ta = re.search(pattern_ta, input_text, flags=re.DOTALL) | |
| match_th = re.search(pattern_th, input_text, flags=re.DOTALL) | |
| # Return True if at least one pattern matches, otherwise False | |
| return bool(match_en or match_zh or match_ms or match_ta or match_th) | |
| # Function to grab all contents in the "Translation" table (except for unique ids) | |
| def getTranslatedContents(): | |
| db = getattr(g, '_database', None) # Gets the _database attribute from the 'g' object. If it does not exist, returns 'None' | |
| if db is None: | |
| db = g._database = sqlite3.connect('Refineverse.db') # If db is None, create a new connection for db and g._database | |
| cursor = db.cursor() # Creates a cursor object to handle data | |
| cursor.execute("SELECT input_text, translated_text FROM Translation") # The cursor executes the query | |
| rows = cursor.fetchall() # Stores the results of fetchall() into a variable | |
| return rows | |
| # Function to insert a new row into the "Translation" table | |
| def insertTranslationRow(input_text, translated_text): | |
| with sqlite3.connect('Refineverse.db') as conn: # 'With' will automatically take care of closing and opening the connection | |
| cursor = conn.cursor() | |
| cursor.execute("INSERT INTO Translation (input_text, translated_text) VALUES (?, ?)", (input_text, translated_text)) | |
| conn.commit() | |