{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "private_outputs": true,
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rK-kOO01qe9L"
      },
      "outputs": [],
      "source": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "PWV5yLxVqfbx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "V28eHua6qff_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install git+https://github.com/huggingface/transformers"
      ],
      "metadata": {
        "id": "LoXk14QLqflG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r /content/a.txt"
      ],
      "metadata": {
        "id": "pgdUZsRrrEfJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "Helsinki-NLP/opus-mt-en-ar\n",
        "\n",
        "sdyy/en-ar"
      ],
      "metadata": {
        "id": "uNfZzFhzsjUZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "ssHEG88C1Ali"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "7u7KH1vv1AoZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# convet csv tojsonl"
      ],
      "metadata": {
        "id": "PPFPtqFBPgMx"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "import json\n",
        "\n",
        "# Specify the input and output file names\n",
        "csv_file_name = \"english_arabic_dataset.csv\"\n",
        "jsonl_file_name = \"english_arabic_dataset.jsonl\"\n",
        "\n",
        "# Read the CSV file and convert each row to a dictionary\n",
        "data = []\n",
        "with open(csv_file_name, mode='r', encoding='utf-8') as csv_file:\n",
        "    csv_reader = csv.DictReader(csv_file)\n",
        "    for row in csv_reader:\n",
        "        translation = row[\"translation\"]\n",
        "        # Assuming the translation is formatted as \"English sentence\",\"Arabic sentence\"\n",
        "        if ',' in translation:\n",
        "            english, arabic = translation.split(',', 1)\n",
        "            data.append({\"English\": english.strip('\"'), \"translation\": arabic.strip('\"')})\n",
        "        else:\n",
        "            # Handle cases where there is no comma\n",
        "            data.append({\"English\": translation.strip('\"'), \"translation\": \"\"})\n",
        "\n",
        "# Write the data to a JSON Lines file\n",
        "with open(jsonl_file_name, mode='w', encoding='utf-8') as jsonl_file:\n",
        "    for item in data:\n",
        "        jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\\n')\n",
        "\n",
        "print(f\"JSON Lines file '{jsonl_file_name}' has been created successfully.\")\n"
      ],
      "metadata": {
        "id": "23eu4ZLo1ArU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## dowload dataset"
      ],
      "metadata": {
        "id": "x1BkM6-5Pzx4"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "dataset = load_dataset('wmt16', 'ro-en')"
      ],
      "metadata": {
        "id": "7ZRJyRGQCizE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "source": [
        "!wget https://raw.githubusercontent.com/Helsinki-NLP/opus-mt-en-ar/master/run_translation.py"
      ],
      "cell_type": "code",
      "metadata": {
        "id": "LOBTHjmd80ZT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://huggingface.co/datasets/wmt/wmt16/tree/main/ro-en"
      ],
      "metadata": {
        "id": "zMCXLEIm-Kpl"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "source": [
        "import csv\n",
        "import json\n",
        "\n",
        "# Specify the input and output file names\n",
        "csv_file_name = \"english_arabic_dataset.csv\"\n",
        "jsonl_file_name = \"english_arabic_dataset.jsonl\"\n",
        "\n",
        "# Read the CSV file and convert each row to a dictionary\n",
        "# Read the CSV file and convert each row to a dictionary\n",
        "data = []\n",
        "with open(csv_file_name, mode='r', encoding='utf-8') as csv_file:\n",
        "    csv_reader = csv.DictReader(csv_file)\n",
        "    for row in csv_reader:\n",
        "        english = row[\"English\"]\n",
        "        arabic = row.get(\"Arabic\", \"\")  # Use get() to avoid KeyError\n",
        "        data.append({\"translation\": {\"en\": english, \"ar\": arabic}})\n",
        "\n",
        "print(f\"JSON Lines file '{jsonl_file_name}' has been created successfully.\")"
      ],
      "cell_type": "code",
      "metadata": {
        "id": "Thf3r5Fm2IV5"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Byy-EbCC4OZE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "1F9zNSTLQBww"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "تسجيل فى huggingface"
      ],
      "metadata": {
        "id": "RJqZKae5QB7R"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!huggingface-cli login"
      ],
      "metadata": {
        "id": "5aR5f2h04Ofj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Oi1KWQR-1AxC"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "kg6Cqg0k1Azl"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "O8G9DSTM0zyW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "jlJyvpJ90z9a"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "import json\n",
        "\n",
        "# Specify the input and output file names\n",
        "csv_file_name = \"english_arabic_dataset.csv\"\n",
        "jsonl_file_name = \"english_arabic_dataset.jsonl\"\n",
        "\n",
        "# Read the CSV file and convert each row to a dictionary\n",
        "data = []\n",
        "with open(csv_file_name, mode='r', encoding='utf-8') as csv_file:\n",
        "    csv_reader = csv.DictReader(csv_file)\n",
        "    for row in csv_reader:\n",
        "        translation = row[\"translation\"]\n",
        "        # Check if the row contains a comma\n",
        "        if ',' in translation:\n",
        "            english, arabic = translation.split(',', 1)\n",
        "            data.append({\"English\": english.strip('\"'), \"translation\": arabic.strip('\"')})\n",
        "        else:\n",
        "            # Handle cases where there is no comma\n",
        "            data.append({\"English\": translation.strip('\"'), \"translation\": \"\"})\n",
        "\n",
        "# Write the data to a JSON Lines file\n",
        "with open(jsonl_file_name, mode='w', encoding='utf-8') as jsonl_file:\n",
        "    for item in data:\n",
        "        jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\\n')\n",
        "\n",
        "print(f\"JSON Lines file '{jsonl_file_name}' has been created successfully.\")\n"
      ],
      "metadata": {
        "id": "dnMfgJzWwC9I"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "oltCt7rewDAB"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "0FhbGKQ7wDDA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "QSs3f3SEu3Da"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "3VgjiZouu3Xo"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "create csv data"
      ],
      "metadata": {
        "id": "_N9OQERxQSrI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import csv\n",
        "\n",
        "# Sample dataset with 50 English-Arabic sentence pairs\n",
        "data = [\n",
        "    (\"Hello, how are you?\", \"مرحباً، كيف حالك؟\"),\n",
        "    (\"I am learning how to code.\", \"أنا أتعلم كيفية البرمجة.\"),\n",
        "    (\"This is a test sentence.\", \"هذه جملة اختبار.\"),\n",
        "    (\"The weather is nice today.\", \"الطقس جميل اليوم.\"),\n",
        "    (\"I like to read books.\", \"أحب قراءة الكتب.\"),\n",
        "    (\"What is your name?\", \"ما اسمك؟\"),\n",
        "    (\"I live in a big city.\", \"أعيش في مدينة كبيرة.\"),\n",
        "    (\"Do you speak Arabic?\", \"هل تتحدث العربية؟\"),\n",
        "    (\"I have a pet cat.\", \"لدي قطة أليفة.\"),\n",
        "    (\"The sun is shining brightly.\", \"الشمس تشرق بسطوع.\"),\n",
        "    (\"I enjoy cooking.\", \"أستمتع بالطهي.\"),\n",
        "    (\"He is a good friend.\", \"هو صديق جيد.\"),\n",
        "    (\"She loves to travel.\", \"هي تحب السفر.\"),\n",
        "    (\"We are going to the park.\", \"نحن ذاهبون إلى الحديقة.\"),\n",
        "    (\"They are playing soccer.\", \"هم يلعبون كرة القدم.\"),\n",
        "    (\"I need to buy groceries.\", \"أحتاج لشراء البقالة.\"),\n",
        "    (\"My favorite color is blue.\", \"لوني المفضل هو الأزرق.\"),\n",
        "    (\"I will call you tomorrow.\", \"سأتصل بك غداً.\"),\n",
        "    (\"Please turn off the lights.\", \"من فضلك أغلق الأنوار.\"),\n",
        "    (\"Can you help me?\", \"هل يمكنك مساعدتي؟\"),\n",
        "    (\"I am very happy today.\", \"أنا سعيد جداً اليوم.\"),\n",
        "    (\"The movie was interesting.\", \"الفيلم كان مثيراً.\"),\n",
        "    (\"We are studying for the exam.\", \"نحن ندرس للامتحان.\"),\n",
        "    (\"I like listening to music.\", \"أحب الاستماع إلى الموسيقى.\"),\n",
        "    (\"She is reading a novel.\", \"هي تقرأ رواية.\"),\n",
        "    (\"He works in an office.\", \"هو يعمل في مكتب.\"),\n",
        "    (\"They are building a house.\", \"هم يبنون منزلاً.\"),\n",
        "    (\"The car is parked outside.\", \"السيارة متوقفة بالخارج.\"),\n",
        "    (\"I enjoy learning new languages.\", \"أستمتع بتعلم لغات جديدة.\"),\n",
        "    (\"The cake tastes delicious.\", \"الكعكة طعمها لذيذ.\"),\n",
        "    (\"We are planning a trip.\", \"نحن نخطط لرحلة.\"),\n",
        "    (\"She is a talented artist.\", \"هي فنانة موهوبة.\"),\n",
        "    (\"He is watching TV.\", \"هو يشاهد التلفاز.\"),\n",
        "    (\"I forgot my keys.\", \"لقد نسيت مفاتيحي.\"),\n",
        "    (\"The book is on the table.\", \"الكتاب على الطاولة.\"),\n",
        "    (\"I need to charge my phone.\", \"أحتاج لشحن هاتفي.\"),\n",
        "    (\"We are having dinner.\", \"نحن نتناول العشاء.\"),\n",
        "    (\"He is writing a letter.\", \"هو يكتب رسالة.\"),\n",
        "    (\"She is singing a song.\", \"هي تغني أغنية.\"),\n",
        "    (\"The children are playing.\", \"الأطفال يلعبون.\"),\n",
        "    (\"I am drinking coffee.\", \"أنا أشرب القهوة.\"),\n",
        "    (\"The plane is taking off.\", \"الطائرة تقلع.\"),\n",
        "    (\"We are visiting our grandparents.\", \"نحن نزور أجدادنا.\"),\n",
        "    (\"He is wearing a suit.\", \"هو يرتدي بدلة.\"),\n",
        "    (\"She is cooking dinner.\", \"هي تطبخ العشاء.\"),\n",
        "    (\"I am feeling tired.\", \"أشعر بالتعب.\"),\n",
        "    (\"The dog is barking.\", \"الكلب ينبح.\"),\n",
        "    (\"I am going to bed.\", \"أنا ذاهب للنوم.\"),\n",
        "    (\"We are celebrating his birthday.\", \"نحن نحتفل بعيد ميلاده.\"),\n",
        "    (\"She is studying medicine.\", \"هي تدرس الطب.\")\n",
        "]\n",
        "\n",
        "# Specify the file name\n",
        "file_name = \"english_arabic_dataset.csv\"\n",
        "\n",
        "# Write to CSV file\n",
        "with open(file_name, mode='w', newline='', encoding='utf-8') as file:\n",
        "    writer = csv.writer(file)\n",
        "    writer.writerow([\"English\", \"translation\"])  # Write the header\n",
        "    writer.writerows(data)  # Write the data\n",
        "\n",
        "print(f\"CSV file '{file_name}' has been created successfully.\")\n"
      ],
      "metadata": {
        "id": "Rl4x1e_Uu3ax"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "Tq3T37lWEhil"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "4Rn8aFUzEhll"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "CRBuxZLBEho_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "mOxewk1EEhr0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "شغال"
      ],
      "metadata": {
        "id": "9zn-QR7JGkAt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python /content/run_translation.py \\\n",
        "    --model_name_or_path Helsinki-NLP/opus-mt-en-ro \\\n",
        "    --do_train \\\n",
        "    --do_eval \\\n",
        "    --source_lang en \\\n",
        "    --target_lang ro \\\n",
        "    --dataset_name wmt16 \\\n",
        "    --dataset_config_name ro-en \\\n",
        "    --output_dir /content/tst-translation \\\n",
        "    --per_device_train_batch_size=4 \\\n",
        "    --per_device_eval_batch_size=4 \\\n",
        "    --overwrite_output_dir \\\n",
        "    --predict_with_generate"
      ],
      "metadata": {
        "id": "FqDQmY9WEht4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ش"
      ],
      "metadata": {
        "id": "85ja_1nzGmvx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "شغال"
      ],
      "metadata": {
        "id": "MU3-tXnVGnxn"
      }
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "6BNO7VXXGpOL"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python /content/run_translation.py \\\n",
        "    --model_name_or_path facebook/mbart-large-en-ro  \\\n",
        "    --do_train \\\n",
        "    --do_eval \\\n",
        "    --dataset_name wmt16 \\\n",
        "    --dataset_config_name ro-en \\\n",
        "    --source_lang en_XX \\\n",
        "    --target_lang ro_RO \\\n",
        "    --output_dir /tmp/tst-translation \\\n",
        "    --per_device_train_batch_size=4 \\\n",
        "    --per_device_eval_batch_size=4 \\\n",
        "    --overwrite_output_dir \\\n",
        "    --predict_with_generate"
      ],
      "metadata": {
        "id": "ibqJuCE7EwHa"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "dataset = load_dataset(\"sdyy/en-ar\")"
      ],
      "metadata": {
        "id": "vOCRRv6CGyRm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "eAdw0rvSNI84"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "كودنجح ف التدريب"
      ],
      "metadata": {
        "id": "hrR3WcbhNJKW"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# كود نجح ف التدريب"
      ],
      "metadata": {
        "id": "oQYg39oiQe6h"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python /content/run_translation.py \\\n",
        "    --model_name_or_path Helsinki-NLP/opus-mt-en-ar \\\n",
        "    --do_train \\\n",
        "    --do_eval \\\n",
        "    --source_lang en \\\n",
        "    --target_lang ar \\\n",
        "    --source_prefix \"translate English to Arabic: \" \\\n",
        "    --dataset_name sdyy/en-ar \\\n",
        "    --dataset_config_name default \\\n",
        "    --train_file train_file.jsonl \\\n",
        "    --validation_file validation_dataset.jsonl \\\n",
        "    --output_dir /content/tst-translation \\\n",
        "    --per_device_train_batch_size 4 \\\n",
        "    --per_device_eval_batch_size 4 \\\n",
        "    --overwrite_output_dir \\\n",
        "    --predict_with_generate\n"
      ],
      "metadata": {
        "id": "P0daa-JoL1kJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "efOp8eT2Mw13"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "r6JXWBi5Mw4g"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import MarianMTModel, MarianTokenizer\n",
        "\n",
        "# تحميل نموذج الترجمة\n",
        "model_name = \"Helsinki-NLP/opus-mt-en-ar\"  # تحديد نموذج الترجمة الذي يترجم من الإنجليزية إلى العربية\n",
        "model = MarianMTModel.from_pretrained(model_name)\n",
        "tokenizer = MarianTokenizer.from_pretrained(model_name)\n",
        "\n",
        "# الجملة التي تريد ترجمتها\n",
        "sentence = \"Others have dismissed him as a joke.\"\n",
        "\n",
        "# تحويل الجملة إلى توكنات\n",
        "inputs = tokenizer(sentence, return_tensors=\"pt\", padding=True, truncation=True)\n",
        "\n",
        "# الترجمة\n",
        "translated = model.generate(**inputs)\n",
        "translated_sentence = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]\n",
        "\n",
        "print(\"الجملة المترجمة:\", translated_sentence)\n"
      ],
      "metadata": {
        "id": "OnvrW8gOMw7q"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "JpnN5jvhNxdX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### طلب نجح ترجمة جملة من الداتاسيت"
      ],
      "metadata": {
        "id": "Gpbx9jHyQoV2"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import MarianMTModel, MarianTokenizer\n",
        "\n",
        "# تحميل نموذج الترجمة\n",
        "model_path = \"/content/tst-translation/checkpoint-9\"\n",
        "model = MarianMTModel.from_pretrained(model_path)\n",
        "tokenizer = MarianTokenizer.from_pretrained(model_path)\n",
        "\n",
        "# الجملة التي تريد ترجمتها\n",
        "sentence = \"Others have dismissed him as a joke.\"\n",
        "\n",
        "# تحويل الجملة إلى توكنات\n",
        "inputs = tokenizer(sentence, return_tensors=\"pt\", padding=True, truncation=True)\n",
        "\n",
        "# الترجمة\n",
        "translated = model.generate(**inputs)\n",
        "translated_sentence = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]\n",
        "\n",
        "print(\"الجملة المترجمة:\", translated_sentence)\n"
      ],
      "metadata": {
        "id": "iqUc-K5sNkH4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "vjiaStGGNkK_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "BaSd47OSNkOB"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "AIhWySn5NkQ3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "jcIT8JXBNkTo"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "pL5I0XNDNkV-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import tensorflow as tf\n",
        "from tensorflow.keras.preprocessing.text import Tokenizer\n",
        "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
        "\n",
        "# البيانات المعطاة\n",
        "data = [\n",
        "    {\"translation\": {\"en\": \"Others have dismissed him as a joke.\", \"ar\": \"اعتبره البعض مجرد مزحة.\"}},\n",
        "    {\"translation\": {\"en\": \"And some are holding out for an implosion.\", \"ar\": \"وبعضهم ينتظر الانهيار الداخلي.\"}},\n",
        "    {\"translation\": {\"en\": \"She dismissed his concerns as trivial.\", \"ar\": \"اعتبرت مخاوفه تافهة.\"}},\n",
        "    {\"translation\": {\"en\": \"Don't dismiss his ideas outright; they might have some merit.\", \"ar\": \"لا تستهتر بأفكاره مباشرة؛ فقد تكون لها قيمة.\"}},\n",
        "    {\"translation\": {\"en\": \"The jury dismissed the case due to lack of evidence.\", \"ar\": \"رفضت المحكمة القضية بسبب عدم وجود أدلة.\"}}\n",
        "]\n",
        "\n",
        "# استخراج الجمل بالإنجليزية والعربية\n",
        "english_sentences = [item[\"translation\"][\"en\"] for item in data]\n",
        "arabic_sentences = [item[\"translation\"][\"ar\"] for item in data]\n",
        "\n",
        "# تحديد معلمات التدريب\n",
        "max_words = 1000\n",
        "max_sequence_length = 100\n",
        "\n",
        "# ترميز الجمل\n",
        "tokenizer_en = Tokenizer(num_words=max_words)\n",
        "tokenizer_en.fit_on_texts(english_sentences)\n",
        "tokenizer_ar = Tokenizer(num_words=max_words)\n",
        "tokenizer_ar.fit_on_texts(arabic_sentences)\n",
        "\n",
        "# تحويل الجمل إلى تسلسلات من الأرقام\n",
        "sequences_en = tokenizer_en.texts_to_sequences(english_sentences)\n",
        "sequences_ar = tokenizer_ar.texts_to_sequences(arabic_sentences)\n",
        "\n",
        "# ملء التسلسلات بصفر حتى يكون لدينا جمل بنفس الطول\n",
        "padded_sequences_en = pad_sequences(sequences_en, maxlen=max_sequence_length)\n",
        "padded_sequences_ar = pad_sequences(sequences_ar, maxlen=max_sequence_length)\n",
        "\n",
        "# بناء نموذج الترجمة\n",
        "embedding_dim = 16\n",
        "model = tf.keras.Sequential([\n",
        "    tf.keras.layers.Embedding(max_words, embedding_dim, input_length=max_sequence_length),\n",
        "    tf.keras.layers.GlobalAveragePooling1D(),\n",
        "    tf.keras.layers.Dense(16, activation='relu'),\n",
        "    tf.keras.layers.Dense(max_words, activation='softmax')\n",
        "])\n",
        "\n",
        "# تحديد معلمات التدريب\n",
        "model.compile(optimizer='adam',\n",
        "              loss='sparse_categorical_crossentropy',\n",
        "              metrics=['accuracy'])\n",
        "\n",
        "# تدريب النموذج\n",
        "model.fit(padded_sequences_ar, padded_sequences_en, epochs=10, validation_split=0.2)\n",
        "\n",
        "# تقييم أداء النموذج\n",
        "# eval_loss, eval_accuracy = model.evaluate(padded_sequences_ar, padded_sequences_en)\n",
        "# print(f'Evaluation Loss: {eval_loss}, Evaluation Accuracy: {eval_accuracy}')\n"
      ],
      "metadata": {
        "id": "MdGsqm-4Mw-Q"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}