# !pip install googletrans==3.1.0a0 # !pip install transformers sentencepiece from googletrans import Translator from transformers import MarianMTModel, MarianTokenizer # transformer based pre-trained language translation model from transformers import MBartForConditionalGeneration, MBart50TokenizerFast def translate_hi2en_gtrans(sentence): """ Function to translate from Hindi to English. Args: - sentence: string in Hindi Returns: - English translated text string """ translator = Translator() output = translator.translate(sentence, dest='en', src='hi') return output.text def translate_en2hi_gtrans(sentence): """ Function to translate from English to Hindi. Args: - sentence: string in English Returns: - Hindi translated text string """ translator = Translator() output = translator.translate(sentence, dest='hi', src='en') return output.text # Translates text from source_lang to target_lang using the pre-trained model def translate_en_hi_transformer(text): # Load the Pre-trained Model and Tokenizer for english to hindi model_name_en_hi = "Helsinki-NLP/opus-mt-en-hi" # English to Hindi translation model tokenizer = MarianTokenizer.from_pretrained(model_name_en_hi) model_en_hi = MarianMTModel.from_pretrained(model_name_en_hi) encoded = tokenizer(text, return_tensors="pt") translated = model_en_hi.generate(**encoded) return tokenizer.batch_decode(translated, skip_special_tokens=True)[0] # Translates text from Hindi to english using the pre-trained model def translate_hi_en_transformer(text): # Load the Pre-trained Model and Tokenizer for hindi to english model_name_hi_en = "Helsinki-NLP/opus-mt-hi-en" # Hindi to English translation model tokenizer_hi = MarianTokenizer.from_pretrained(model_name_hi_en) model_hi_en = MarianMTModel.from_pretrained(model_name_hi_en) encoded = tokenizer_hi(text, return_tensors="pt") translated = model_hi_en.generate(**encoded) return tokenizer_hi.batch_decode(translated, skip_special_tokens=True)[0] def translate_mbart(text, source_lang, target_lang): # Load model and tokenizer outside the function model_name = "facebook/mbart-large-50-many-to-many-mmt" model = MBartForConditionalGeneration.from_pretrained(model_name) tokenizer = MBart50TokenizerFast.from_pretrained(model_name) # Set source language tokenizer.src_lang = source_lang # Encode the text encoded_text = tokenizer(text, return_tensors="pt") # Force target language token forced_bos_token_id = tokenizer.lang_code_to_id[target_lang] # Generate the translation generated_tokens = model.generate(**encoded_text, forced_bos_token_id=forced_bos_token_id) # Decode the translation translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] return translation if __name__ == "__main__": print(translate_hi2en_gtrans("मैं खुश हूँ!!!")) print(translate_en2hi_gtrans("I am happy!!!")) import pandas as pd # Read CSV file into a Pandas DataFrame df_en = pd.read_csv('Data_with_QnA.csv', usecols=['Question1', 'Answer1', 'Question2', 'Answer2', 'Question3', 'Answer3', 'Question4', 'Answer4']) df_en.head() # convert all the question answers from English to Hindi df_hi = df_en.applymap(translate_en2hi_gtrans) df_hi.head() # Save the modified DataFrame to a CSV file df_hi.to_csv('Hindi_QnA.csv', index=False) # English to Hindi example english_text = " What is the material used to create the chess set?" hindi_translation = translate_en_hi_transformer(english_text) print(f"English: {english_text}") print(f"Hindi: {hindi_translation}") # Hindi to English example hindi_text = "आपका दिन कैसा चल रहा है?" # How is your day going? english_translation = translate_hi_en_transformer(hindi_text) print(f"Hindi: {hindi_text}") print(f"English: {english_translation}") # Example usage hindi_text = "हिन्दी साहित्य पर अगर समुचित परिप्रेक्ष्य में विचार किया जाए तो स्पष्ट होता है कि हिन्दी साहित्य का इतिहास अत्यन्त विस्तृत व प्राचीन है। सुप्रसिद्ध भाषा वैज्ञानिक डॉ० हरदेव बाहरी के शब्दों में, हिन्दी साहित्य का इतिहास वस्तुतः वैदिक काल से आरम्भ होता है। यह कहना ही ठीक होगा कि वैदिक भाषा ही हिन्दी है। इस भाषा का दुर्भाग्य रहा है कि युग-युग में इसका नाम परिवर्तित होता रहा है। कभी 'वैदिक', कभी 'संस्कृत', कभी 'प्राकृत', कभी'अपभ्रंश' और अब - हिन्दी।[1] आलोचक कह सकते हैं कि 'वैदिक संस्कृत' और 'हिन्दी' में तो जमीन-आसमान का अन्तर है। पर ध्यान देने योग्य है कि हिब्रू, रूसी, चीनी, जर्मन और तमिल आदि जिन भाषाओं को 'बहुत पुरानी' बताया जाता है, उनके भी प्राचीन और वर्तमान रूपों में जमीन-आसमान का अन्तर है; पर लोगों ने उन भाषाओं के नाम नहीं बदले और उनके परिवर्तित स्वरूपों को 'प्राचीन', 'मध्यकालीन', 'आधुनिक' आदि कहा गया, जबकि 'हिन्दी' के सन्दर्भ में प्रत्येक युग की भाषा का नया नाम रखा जाता रहा।" english_translation = translate_mbart(hindi_text, "hi_IN", "en_XX") print(english_translation) english_text = "English literature, the body of written works produced in the English language by inhabitants of the British Isles (including Ireland) from the 7th century to the present day. The major literatures written in English outside the British Isles are treated separately under American literature, Australian literature, Canadian literature, and New Zealand literature. English literature has sometimes been stigmatized as insular. It can be argued that no single English novel attains the universality of the Russian writer Leo Tolstoy’s War and Peace or the French writer Gustave Flaubert’s Madame Bovary. Yet in the Middle Ages the Old English literature of the subjugated Saxons was leavened by the Latin and Anglo-Norman writings, eminently foreign in origin, in which the churchmen and the Norman conquerors expressed themselves. From this combination emerged a flexible and subtle linguistic instrument exploited by Geoffrey Chaucer and brought to supreme application by William Shakespeare. During the Renaissance the renewed interest in Classical learning and values had an important effect on English literature, as on all the arts; and ideas of Augustan literary propriety in the 18th century and reverence in the 19th century for a less specific, though still selectively viewed, Classical antiquity continued to shape the literature. All three of these impulses derived from a foreign source, namely the Mediterranean basin. The Decadents of the late 19th century and the Modernists of the early 20th looked to continental European individuals and movements for inspiration. Nor was attraction toward European intellectualism dead in the late 20th century, for by the mid-1980s the approach known as structuralism, a phenomenon predominantly French and German in origin, infused the very study of English literature itself in a host of published critical studies and university departments. Additional influence was exercised by deconstructionist analysis, based largely on the work of French philosopher Jacques Derrida." hindi_translation = translate_mbart(english_text, "en_XX", "hi_IN") print(hindi_translation)