{ "cells": [ { "cell_type": "markdown", "source": [ "1. Download the donut folder from Github https://github.com/clovaai/donut\n", "2. Copy a config file in folder and change the name to hold your configuration.\n", "3. Place your dataset (train, validation, test) along with JSONL files on the dataset folder.\n", "4. Refer to donut_training.ipynb to train your model. Use A-100/V-100 GPU to avoid troublesome settings / slow training time.\n", "5. Run the trained model using this ipynb file." ], "metadata": { "id": "L5U1ACZZBxfh" } }, { "cell_type": "code", "source": [ "# Enable Google Drive and Go to the donut folder\n", "from google.colab import drive\n", "drive.mount('/content/drive')\n", "%cd /content/drive/MyDrive/donut" ], "metadata": { "id": "-BZ2HFB9OtWP" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SJpD4AAj7qeZ" }, "outputs": [], "source": [ "#Install all necessary modules. Don't change the version number!\n", "!pip install transformers==4.25.1\n", "!pip install timm==0.5.4\n", "!pip install donut-python" ] }, { "cell_type": "code", "source": [ "# import necessary modules\n", "from donut import DonutModel\n", "from PIL import Image\n", "import torch" ], "metadata": { "id": "gSatjcDn5S89" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Test the model with testing data. Just to initiate model.\n", "!python test.py --task_name Booking --dataset_name_or_path dataset/Booking --pretrained_model_name_or_path ./result/train_Booking/donut-booking-extract" ], "metadata": { "id": "dyOv9Omo8dJU" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "model = DonutModel.from_pretrained(\"./result/train_Booking/donut-booking-extract\")\n", "if torch.cuda.is_available():\n", " model.half()\n", " device = torch.device(\"cuda\")\n", " model.to(device)\n", "else:\n", " model.encoder.to(torch.bfloat16)\n", "\n", "model.eval()\n", "\n", "image = Image.open(\"/content/drive/MyDrive/donut/test/4.jpg\").convert(\"RGB\")\n", "\n", "with torch.no_grad():\n", " output = model.inference(image=image, prompt=\"\")\n", "output" ], "metadata": { "id": "dFfm72T93Z8G" }, "execution_count": null, "outputs": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "V100", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }