{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "deep-text-recognition-benchmark.ipynb", "provenance": [], "collapsed_sections": [], "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "id": "dHbHK60Tqn2c", "colab_type": "text" }, "source": [ "This can be executed in https://colab.research.google.com \"Python 3 / GPU\" runtime." ] }, { "cell_type": "code", "metadata": { "id": "Pi-PA14AhdK-", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 153 }, "outputId": "5d620547-6ce9-42b9-8206-20f88f23e567" }, "source": [ "!git clone https://github.com/clovaai/deep-text-recognition-benchmark\n", "%cd deep-text-recognition-benchmark" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Cloning into 'deep-text-recognition-benchmark'...\n", "remote: Enumerating objects: 40, done.\u001b[K\n", "remote: Counting objects: 100% (40/40), done.\u001b[K\n", "remote: Compressing objects: 100% (35/35), done.\u001b[K\n", "remote: Total 376 (delta 20), reused 13 (delta 5), pack-reused 336\u001b[K\n", "Receiving objects: 100% (376/376), 2.43 MiB | 2.61 MiB/s, done.\n", "Resolving deltas: 100% (222/222), done.\n", "/content/deep-text-recognition-benchmark\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "jX-ucvimlaFZ", "colab_type": "text" }, "source": [ "Next, download large model files from Google Drive, using hack: https://stackoverflow.com/questions/20665881/direct-download-from-google-drive-using-google-drive-api/32742700#32742700" ] }, { "cell_type": "code", "metadata": { "id": "9eEhhPBshkjr", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 833 }, "outputId": "205d0bd8-340f-4e9b-ebfd-e9c8f7d327a0" }, "source": [ "models = {\n", " 'None-ResNet-None-CTC.pth': 'https://drive.google.com/open?id=1FocnxQzFBIjDT2F9BkNUiLdo1cC3eaO0',\n", " 'None-VGG-BiLSTM-CTC.pth': 'https://drive.google.com/open?id=1GGC2IRYEMQviZhqQpbtpeTgHO_IXWetG',\n", " 'None-VGG-None-CTC.pth': 'https://drive.google.com/open?id=1FS3aZevvLiGF1PFBm5SkwvVcgI6hJWL9',\n", " 'TPS-ResNet-BiLSTM-Attn-case-sensitive.pth': 'https://drive.google.com/open?id=1ajONZOgiG9pEYsQ-eBmgkVbMDuHgPCaY',\n", " 'TPS-ResNet-BiLSTM-Attn.pth': 'https://drive.google.com/open?id=1b59rXuGGmKne1AuHnkgDzoYgKeETNMv9',\n", " 'TPS-ResNet-BiLSTM-CTC.pth': 'https://drive.google.com/open?id=1FocnxQzFBIjDT2F9BkNUiLdo1cC3eaO0',\n", "}\n", "\n", "for k, v in models.items():\n", " doc_id = v[v.find('=')+1:]\n", " !curl -c /tmp/cookies \"https://drive.google.com/uc?export=download&id=$doc_id\" > /tmp/intermezzo.html\n", " !curl -L -b /tmp/cookies \"https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link\" [^>]* href=\"\\K[^\"]*' | sed 's/\\&/\\&/g')\" > $k\n", "\n", "!ls -al *.pth" ], "execution_count": 17, "outputs": [ { "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 3263 0 3263 0 0 13264 0 --:--:-- --:--:-- --:--:-- 13210\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 1644 0 --:--:-- --:--:-- --:--:-- 1644\n", "100 186M 0 186M 0 0 79.9M 0 --:--:-- 0:00:02 --:--:-- 104M\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 203 0 --:--:-- 0:00:01 --:--:-- 203\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", "100 64576 0 64576 0 0 211k 0 --:--:-- --:--:-- --:--:-- 211k\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 180 0 --:--:-- 0:00:02 --:--:-- 180\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", "100 64696 0 64696 0 0 218k 0 --:--:-- --:--:-- --:--:-- 218k\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 3279 0 3279 0 0 11153 0 --:--:-- --:--:-- --:--:-- 11153\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 1216 0 --:--:-- --:--:-- --:--:-- 1212\n", "100 189M 0 189M 0 0 84.3M 0 --:--:-- 0:00:02 --:--:-- 111M\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 3255 0 3255 0 0 11302 0 --:--:-- --:--:-- --:--:-- 11302\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 1464 0 --:--:-- --:--:-- --:--:-- 1464\n", "100 189M 0 189M 0 0 72.5M 0 --:--:-- 0:00:02 --:--:-- 88.8M\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 3263 0 3263 0 0 13052 0 --:--:-- --:--:-- --:--:-- 13052\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 388 0 388 0 0 1644 0 --:--:-- --:--:-- --:--:-- 1644\n", "100 186M 0 186M 0 0 124M 0 --:--:-- 0:00:01 --:--:-- 176M\n", "-rw-r--r-- 1 root root 195888589 Oct 27 14:16 None-ResNet-None-CTC.pth\n", "-rw-r--r-- 1 root root 64576 Oct 27 14:17 None-VGG-BiLSTM-CTC.pth\n", "-rw-r--r-- 1 root root 64696 Oct 27 14:17 None-VGG-None-CTC.pth\n", "-rw-r--r-- 1 root root 198975977 Oct 27 14:17 TPS-ResNet-BiLSTM-Attn-case-sensitive.pth\n", "-rw-r--r-- 1 root root 198678680 Oct 27 14:17 TPS-ResNet-BiLSTM-Attn.pth\n", "-rw-r--r-- 1 root root 195888589 Oct 27 14:17 TPS-ResNet-BiLSTM-CTC.pth\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "s-E716WnhfrI", "colab_type": "code", "colab": {} }, "source": [ "output = !CUDA_VISIBLE_DEVICES=0 python3 demo.py \\\n", "--Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn \\\n", "--image_folder demo_image/ \\\n", "--saved_model TPS-ResNet-BiLSTM-Attn.pth" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "bJRPln2QlxlJ", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "a81e0b6b-8171-49a6-c59f-15649d7c6bb8" }, "source": [ "from IPython.core.display import display, HTML\n", "from PIL import Image\n", "import base64\n", "import io\n", "import pandas as pd\n", "\n", "data = pd.DataFrame()\n", "for ind, row in enumerate(output[output.index('image_path \\tpredicted_labels \\tconfidence score')+2:]):\n", " row = row.split('\\t')\n", " filename = row[0].strip()\n", " label = row[1].strip()\n", " conf = row[2].strip()\n", " img = Image.open(filename)\n", " img_buffer = io.BytesIO()\n", " img.save(img_buffer, format=\"PNG\")\n", " imgStr = base64.b64encode(img_buffer.getvalue()).decode(\"utf-8\") \n", "\n", " data.loc[ind, 'img'] = ''.format(imgStr)\n", " data.loc[ind, 'id'] = filename\n", " data.loc[ind, 'label'] = label\n", " data.loc[ind, 'conf'] = conf\n", "\n", "html_all = data.to_html(escape=False)\n", "display(HTML(html_all))" ], "execution_count": 54, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "
\n", " | img | \n", "id | \n", "label | \n", "conf | \n", "
---|---|---|---|---|
0 | \n", "\n", " | demo_image/demo_1.png | \n", "available | \n", "0.9999 | \n", "
1 | \n", "\n", " | demo_image/demo_2.jpg | \n", "shakeshack | \n", "0.9263 | \n", "
2 | \n", "\n", " | demo_image/demo_3.png | \n", "london | \n", "0.9878 | \n", "
3 | \n", "\n", " | demo_image/demo_4.png | \n", "greenstead | \n", "0.9984 | \n", "
4 | \n", "\n", " | demo_image/demo_5.png | \n", "toast | \n", "0.9963 | \n", "
5 | \n", "\n", " | demo_image/demo_6.png | \n", "merry | \n", "0.9977 | \n", "
6 | \n", "\n", " | demo_image/demo_7.png | \n", "underground | \n", "1.0000 | \n", "
7 | \n", "\n", " | demo_image/demo_8.jpg | \n", "ronaldo | \n", "0.8412 | \n", "
8 | \n", "\n", " | demo_image/demo_9.jpg | \n", "bally | \n", "0.7814 | \n", "
9 | \n", "\n", " | demo_image/demo_10.jpg | \n", "university | \n", "0.9998 | \n", "