{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "-BZ2HFB9OtWP" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')\n", "%cd /content/drive/MyDrive/donut" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "SJpD4AAj7qeZ" }, "outputs": [], "source": [ "!pip install transformers==4.25.1\n", "!pip install timm==0.5.4\n", "!pip install donut-python" ] }, { "cell_type": "code", "source": [ "from donut import DonutModel\n", "from PIL import Image\n", "import torch" ], "metadata": { "id": "PxFaO3rfDHQJ" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Ro21MdJPSTZs" }, "outputs": [], "source": [ "# Copy one default config yaml file and amend to fit your use case.\n", "!python train.py --config ./config/train_Booking.yaml" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "J1ITHX4jV2Go" }, "outputs": [], "source": [ "# After train, you can evaluate and use the model.\n", "\n", "model = DonutModel.from_pretrained(\"/content/drive/MyDrive/donut/result/train_Booking/20240327_032854\")\n", "if torch.cuda.is_available():\n", " model.half()\n", " device = torch.device(\"cuda\")\n", " model.to(device)\n", "else:\n", " model.encoder.to(torch.bfloat16)\n", "\n", "model.eval()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2UhjFTmrWIrX" }, "outputs": [], "source": [ "image = Image.open(\"/content/COSCO_000.jpg\").convert(\"RGB\")\n", "with torch.no_grad():\n", " # My dataset name is Booking , tag i.e. \n", " output = model.inference(image=image, prompt=\"\")\n", "output" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "V100", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }