Transformers
Italian
English
semantic-search
explainable-ai
faiss
ai-ethics
responsible-ai
llm
prompt-engineering
multimodal-ai
ai-transparency
ethical-intelligence
explainable-llm
cognitive-ai
ethical-ai
scientific-retrieval
modular-ai
memory-augmented-llm
trustworthy-ai
reasoning-engine
ai-alignment
next-gen-llm
thinking-machines
open-source-ai
explainability
ai-research
semantic audit
cognitive agent
human-centered-ai
| # © 2025 Elena Marziali — Code released under Apache 2.0 license. | |
| # See LICENSE in the repository for details. | |
| # Removal of this copyright is prohibited. | |
| # === Text Translation === | |
| # Caching dictionary for previously translated texts | |
| translation_cache = {} | |
| def detect_language(text): | |
| """Detects the language of the loaded text.""" | |
| try: | |
| return detect(text) | |
| except Exception as e: | |
| print(f"Language detection error: {e}") | |
| return "unknown" | |
| def translate_text(text, source_lang, target_lang): | |
| """ Translates the text with debug output to verify correctness. """ | |
| translation_model = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" | |
| print(f"Using translation model: {translation_model}") | |
| translator = pipeline("translation", model=translation_model) | |
| translation = translator(text)[0]['translation_text'] | |
| print(f"Original text: {text}") | |
| print(f"Translated text: {translation}") | |
| return translation | |
| def extract_text_pdf(file_name): | |
| """ Extracts text from a PDF file. """ | |
| text = "" | |
| with pdfplumber.open(file_name) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| def extract_text_docx(file_name): | |
| """ Extracts text from a DOCX file. """ | |
| doc = Document(file_name) | |
| text = "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
| return text.strip() | |
| def save_docx(text, output_file_name): | |
| """ Saves translated text into a DOCX document. """ | |
| doc = Document() | |
| doc.add_paragraph(text) | |
| doc.save(output_file_name) | |
| def extract_text_csv(file_name): | |
| """ Extracts textual content from a CSV file. """ | |
| df = pd.read_csv(file_name) | |
| text = df.astype(str).apply(lambda x: ' '.join(x), axis=1).str.cat(sep='\n') | |
| return text.strip() | |
| def extract_text_tsv(file_name): | |
| """ Extracts textual content from a TSV file. """ | |
| df = pd.read_csv(file_name, sep='\t') | |
| text = df.astype(str).apply(lambda x: ' '.join(x), axis=1).str.cat(sep='\n') | |
| return text.strip() | |
| def handle_file(file_name): | |
| """ Loads the file, detects its language, and lets the user choose a target language for translation. """ | |
| extension = file_name.split('.')[-1].lower() | |
| if extension == "pdf": | |
| text = extract_text_pdf(file_name) | |
| elif extension == "docx": | |
| text = extract_text_docx(file_name) | |
| elif extension == "csv": | |
| text = extract_text_csv(file_name) | |
| elif extension == "tsv": | |
| text = extract_text_tsv(file_name) | |
| else: | |
| return "Unsupported format! Use PDF, DOCX, CSV, or TSV." | |
| original_language = detect_language(text) | |
| print(f"The file was detected in **{original_language}**.") | |
| # List of available languages | |
| available_languages = ["en", "fr", "de", "es", "zh", "ja", "ar", "it"] | |
| # Ask the user for the target language | |
| print(f"Available languages for translation: {', '.join(available_languages)}") | |
| target_language = input("Which language do you want the explanation in? (e.g., 'en' for English, 'fr' for French): ").strip() | |
| if target_language not in available_languages: | |
| print("Error: Unsupported language!") | |
| else: | |
| print(f"The explanation will be translated into {target_language}.") | |
| # Ensure translation is performed | |
| translated_text = translate_text(text, original_language, target_language) | |
| # Save the translated file | |
| translated_file_name = f"translated_{target_language}_{file_name}" | |
| if extension == "pdf": | |
| with open(translated_file_name, "w", encoding="utf-8") as f: | |
| f.write(translated_text) | |
| elif extension == "docx": | |
| save_docx(translated_text, translated_file_name) | |
| return f"Translation completed! Download the file: {translated_file_name}" | |
| # Initialize the dictionary to store journals | |
| journal_store = {} | |
| def save_multilingual_journal(journal_text, journal_id, target_language): | |
| source_language = detect_language(journal_text) | |
| if source_language != target_language: | |
| translated_text = translate_long_text(journal_text, source_lang=source_language, target_lang=target_language) | |
| else: | |
| translated_text = journal_text | |
| journal_store[journal_id] = { | |
| "original": journal_text, | |
| target_language: translated_text | |
| } | |
| embedding = safe_encode(translated_text) | |
| index.add(np.array(embedding, dtype=np.float32)) | |
| def translate_long_text(text, source_lang="it", target_lang="en", max_chars=400): | |
| translation_model = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}" | |
| translator = pipeline("translation", model=translation_model) | |
| blocks = [text[i:i+max_chars] for i in range(0, len(text), max_chars)] | |
| translated = [] | |
| for block in blocks: | |
| try: | |
| output = translator(block)[0]['translation_text'] | |
| translated.append(output) | |
| except Exception as e: | |
| print(f"Error translating block: {e}") | |
| translated.append("[Translation error]") | |
| return "\n".join(translated) | |
| def search_similar_journals(query, target_language, top_k=3): | |
| query_language = detect_language(query) | |
| if query_language != target_language: | |
| translated_query = translate_long_text(query, source_lang=query_language, target_lang=target_language) | |
| else: | |
| translated_query = query | |
| query_emb = safe_encode(translated_query) | |
| query_emb = np.array(query_emb, dtype=np.float32) | |
| if hasattr(index, "is_trained") and not index.is_trained: | |
| print("FAISS index is not trained.") | |
| return [] | |
| D, I = index.search(query_emb, top_k) | |
| results = [] | |
| for i in I[0]: | |
| journal = journal_store.get(i, {}) | |
| results.append(journal.get(target_language, "")) | |
| return results | |
| # === Valid Input Function === | |
| def get_valid_input(message, valid_options=None): | |
| while True: | |
| value = input(message).strip().lower() | |
| if not value: | |
| print("Error! Please enter a valid value.") | |
| elif valid_options and value not in valid_options: | |
| print(f"Error! You must choose from: {', '.join(valid_options)}") | |
| else: | |
| return value |