From 9e5ae484c5abc5d5399fce888cd260f5242a52cc Mon Sep 17 00:00:00 2001 From: Taras Date: Sun, 27 Oct 2019 10:43:59 -0400 Subject: [PATCH] Sample ipython notebook demonstrating use of the code --- demo.ipynb | 321 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 demo.ipynb diff --git a/demo.ipynb b/demo.ipynb new file mode 100644 index 0000000..5f5d243 --- /dev/null +++ b/demo.ipynb @@ -0,0 +1,321 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "deep-text-recognition-benchmark.ipynb", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dHbHK60Tqn2c", + "colab_type": "text" + }, + "source": [ + "This can be executed in https://colab.research.google.com \"Python 3 / GPU\" runtime." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Pi-PA14AhdK-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 + }, + "outputId": "5d620547-6ce9-42b9-8206-20f88f23e567" + }, + "source": [ + "!git clone https://github.com/clovaai/deep-text-recognition-benchmark\n", + "%cd deep-text-recognition-benchmark" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'deep-text-recognition-benchmark'...\n", + "remote: Enumerating objects: 40, done.\u001b[K\n", + "remote: Counting objects: 100% (40/40), done.\u001b[K\n", + "remote: Compressing objects: 100% (35/35), done.\u001b[K\n", + "remote: Total 376 (delta 20), reused 13 (delta 5), pack-reused 336\u001b[K\n", + "Receiving objects: 100% (376/376), 2.43 MiB | 2.61 MiB/s, done.\n", + "Resolving deltas: 100% (222/222), done.\n", + "/content/deep-text-recognition-benchmark\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jX-ucvimlaFZ", + "colab_type": "text" + }, + "source": [ + "Next, download large model files from Google Drive, using hack: https://stackoverflow.com/questions/20665881/direct-download-from-google-drive-using-google-drive-api/32742700#32742700" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9eEhhPBshkjr", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 833 + }, + "outputId": "205d0bd8-340f-4e9b-ebfd-e9c8f7d327a0" + }, + "source": [ + "models = {\n", + " 'None-ResNet-None-CTC.pth': 'https://drive.google.com/open?id=1FocnxQzFBIjDT2F9BkNUiLdo1cC3eaO0',\n", + " 'None-VGG-BiLSTM-CTC.pth': 'https://drive.google.com/open?id=1GGC2IRYEMQviZhqQpbtpeTgHO_IXWetG',\n", + " 'None-VGG-None-CTC.pth': 'https://drive.google.com/open?id=1FS3aZevvLiGF1PFBm5SkwvVcgI6hJWL9',\n", + " 'TPS-ResNet-BiLSTM-Attn-case-sensitive.pth': 'https://drive.google.com/open?id=1ajONZOgiG9pEYsQ-eBmgkVbMDuHgPCaY',\n", + " 'TPS-ResNet-BiLSTM-Attn.pth': 'https://drive.google.com/open?id=1b59rXuGGmKne1AuHnkgDzoYgKeETNMv9',\n", + " 'TPS-ResNet-BiLSTM-CTC.pth': 'https://drive.google.com/open?id=1FocnxQzFBIjDT2F9BkNUiLdo1cC3eaO0',\n", + "}\n", + "\n", + "for k, v in models.items():\n", + " doc_id = v[v.find('=')+1:]\n", + " !curl -c /tmp/cookies \"https://drive.google.com/uc?export=download&id=$doc_id\" > /tmp/intermezzo.html\n", + " !curl -L -b /tmp/cookies \"https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link\" [^>]* href=\"\\K[^\"]*' | sed 's/\\&/\\&/g')\" > $k\n", + "\n", + "!ls -al *.pth" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 3263 0 3263 0 0 13264 0 --:--:-- --:--:-- --:--:-- 13210\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 1644 0 --:--:-- --:--:-- --:--:-- 1644\n", + "100 186M 0 186M 0 0 79.9M 0 --:--:-- 0:00:02 --:--:-- 104M\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 203 0 --:--:-- 0:00:01 --:--:-- 203\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", + "100 64576 0 64576 0 0 211k 0 --:--:-- --:--:-- --:--:-- 211k\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 180 0 --:--:-- 0:00:02 --:--:-- 180\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\n", + "100 64696 0 64696 0 0 218k 0 --:--:-- --:--:-- --:--:-- 218k\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 3279 0 3279 0 0 11153 0 --:--:-- --:--:-- --:--:-- 11153\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 1216 0 --:--:-- --:--:-- --:--:-- 1212\n", + "100 189M 0 189M 0 0 84.3M 0 --:--:-- 0:00:02 --:--:-- 111M\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 3255 0 3255 0 0 11302 0 --:--:-- --:--:-- --:--:-- 11302\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 1464 0 --:--:-- --:--:-- --:--:-- 1464\n", + "100 189M 0 189M 0 0 72.5M 0 --:--:-- 0:00:02 --:--:-- 88.8M\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 3263 0 3263 0 0 13052 0 --:--:-- --:--:-- --:--:-- 13052\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 388 0 388 0 0 1644 0 --:--:-- --:--:-- --:--:-- 1644\n", + "100 186M 0 186M 0 0 124M 0 --:--:-- 0:00:01 --:--:-- 176M\n", + "-rw-r--r-- 1 root root 195888589 Oct 27 14:16 None-ResNet-None-CTC.pth\n", + "-rw-r--r-- 1 root root 64576 Oct 27 14:17 None-VGG-BiLSTM-CTC.pth\n", + "-rw-r--r-- 1 root root 64696 Oct 27 14:17 None-VGG-None-CTC.pth\n", + "-rw-r--r-- 1 root root 198975977 Oct 27 14:17 TPS-ResNet-BiLSTM-Attn-case-sensitive.pth\n", + "-rw-r--r-- 1 root root 198678680 Oct 27 14:17 TPS-ResNet-BiLSTM-Attn.pth\n", + "-rw-r--r-- 1 root root 195888589 Oct 27 14:17 TPS-ResNet-BiLSTM-CTC.pth\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "s-E716WnhfrI", + "colab_type": "code", + "colab": {} + }, + "source": [ + "output = !CUDA_VISIBLE_DEVICES=0 python3 demo.py \\\n", + "--Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn \\\n", + "--image_folder demo_image/ \\\n", + "--saved_model TPS-ResNet-BiLSTM-Attn.pth" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bJRPln2QlxlJ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "a81e0b6b-8171-49a6-c59f-15649d7c6bb8" + }, + "source": [ + "from IPython.core.display import display, HTML\n", + "from PIL import Image\n", + "import base64\n", + "import io\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame()\n", + "for ind, row in enumerate(output[output.index('image_path \\tpredicted_labels \\tconfidence score')+2:]):\n", + " row = row.split('\\t')\n", + " filename = row[0].strip()\n", + " label = row[1].strip()\n", + " conf = row[2].strip()\n", + " img = Image.open(filename)\n", + " img_buffer = io.BytesIO()\n", + " img.save(img_buffer, format=\"PNG\")\n", + " imgStr = base64.b64encode(img_buffer.getvalue()).decode(\"utf-8\") \n", + "\n", + " data.loc[ind, 'img'] = ''.format(imgStr)\n", + " data.loc[ind, 'id'] = filename\n", + " data.loc[ind, 'label'] = label\n", + " data.loc[ind, 'conf'] = conf\n", + "\n", + "html_all = data.to_html(escape=False)\n", + "display(HTML(html_all))" + ], + "execution_count": 54, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imgidlabelconf
0demo_image/demo_1.pngavailable0.9999
1demo_image/demo_2.jpgshakeshack0.9263
2demo_image/demo_3.pnglondon0.9878
3demo_image/demo_4.pnggreenstead0.9984
4demo_image/demo_5.pngtoast0.9963
5demo_image/demo_6.pngmerry0.9977
6demo_image/demo_7.pngunderground1.0000
7demo_image/demo_8.jpgronaldo0.8412
8demo_image/demo_9.jpgbally0.7814
9demo_image/demo_10.jpguniversity0.9998
" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + } + ] +} \ No newline at end of file