From a4713b591f78b91dc1481acbffd83e8df3dfd647 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Arrufat?=
 <1671644+arrufat@users.noreply.github.com>
Date: Mon, 5 Apr 2021 02:27:32 +0900
Subject: [PATCH] Add letterbox image (#2335)

* Add letterbox image

* use && instead of and

* make function adhere to the generic image interface

* avoid extra copy

* add some overloads and a simple test

* add documentation

* use zero_border_pixels and remove superfluous temporary image

* allow different input and out images and update docs

* remove empty line

* be more explicit about output image size
---
 dlib/image_transforms/interpolation.h         | 64 +++++++++++++
 .../image_transforms/interpolation_abstract.h | 89 +++++++++++++++++++
 dlib/test/image.cpp                           | 24 +++++
 3 files changed, 177 insertions(+)
diff --git a/dlib/image_transforms/interpolation.h b/dlib/image_transforms/interpolation.h
index 2eedb60e4..bcfff2e34 100644
--- a/dlib/image_transforms/interpolation.h
+++ b/dlib/image_transforms/interpolation.h
@@ -965,6 +965,70 @@ namespace dlib
         swap(img, temp);
     }
 
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type1,
+        typename image_type2,
+        typename interpolation_type
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out,
+        long size,
+        const interpolation_type& interp
+    )
+    {
+        DLIB_CASSERT(size > 0, "size must be bigger than zero, but was " << size);
+        const_image_view<image_type1> vimg_in(img_in);
+        image_view<image_type2> vimg_out(img_out);
+
+        const auto scale = size / std::max<double>(vimg_in.nr(), vimg_in.nc());
+
+        // early return if the image has already the requested size and no padding is needed
+        if (scale == 1 && vimg_in.nr() == vimg_in.nc())
+        {
+            assign_image(vimg_out, vimg_in);
+            return point_transform_affine();
+        }
+
+        vimg_out.set_size(size, size);
+
+        const long nr = std::round(scale * vimg_in.nr());
+        const long nc = std::round(scale * vimg_in.nc());
+        dpoint offset((size - nc) / 2.0, (size - nr) / 2.0);
+        const auto r = rectangle(offset.x(), offset.y(), offset.x() + nc - 1, offset.y() + nr - 1);
+        zero_border_pixels(vimg_out, r);
+        auto si = sub_image(img_out, r);
+        resize_image(vimg_in, si, interp);
+        return point_transform_affine(identity_matrix<double>(2) * scale, offset);
+    }
+
+    template <
+        typename image_type1,
+        typename image_type2
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out,
+        long size
+    )
+    {
+        return letterbox_image(img_in, img_out, size, interpolate_bilinear());
+    }
+
+    template <
+        typename image_type1,
+        typename image_type2
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out
+    )
+    {
+        return letterbox_image(img_in, img_out, std::max(num_rows(img_in), num_columns(img_in)), interpolate_bilinear());
+    }
+
 // ----------------------------------------------------------------------------------------
 
     template <
diff --git a/dlib/image_transforms/interpolation_abstract.h b/dlib/image_transforms/interpolation_abstract.h
index 626988837..eea310e98 100644
--- a/dlib/image_transforms/interpolation_abstract.h
+++ b/dlib/image_transforms/interpolation_abstract.h
@@ -436,6 +436,95 @@ namespace dlib
             - Returns immediately, if size_scale == 1.0
     !*/
 
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type1,
+        typename image_type2,
+        typename interpolation_type
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out,
+        long size
+        const interpolation_type interp
+    );
+    /*!
+        requires
+            - image_type1 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - image_type2 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
+              interpolate_quadratic, or a type with a compatible interface.
+            - size > 0
+            - is_same_object(in_img, out_img) == false
+        ensures
+            - Scales in_img so that it fits into a size * size square.
+              In particular, we will have:
+                - #img_out.nr() == size
+                - #img_out.nc() == size
+            - Preserves the aspect ratio of in_img by 0-padding the shortest side.
+            - Uses the supplied interpolation routine interp to perform the necessary
+              pixel interpolation.
+            - Returns a transformation object that maps points in in_img into their
+              corresponding location in #out_img.
+    !*/
+
+    template <
+        typename image_type1,
+        typename image_type2
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out,
+        long size
+    );
+    /*!
+        requires
+            - image_type1 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - image_type2 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - size > 0
+            - is_same_object(in_img, out_img) == false
+        ensures
+            - Scales in_img so that it fits into a size * size square.
+              In particular, we will have:
+                - #img_out.nr() == size
+                - #img_out.nc() == size
+            - Preserves the aspect ratio of in_img by 0-padding the shortest side.
+            - Uses the bilinear interpolation to perform the necessary pixel
+              interpolation.
+            - Returns a transformation object that maps points in in_img into their
+              corresponding location in #out_img.
+    !*/
+
+    template <
+        typename image_type1,
+        typename image_type2
+        >
+    point_transform_affine letterbox_image (
+        const image_type1& img_in,
+        image_type2& img_out
+    );
+    /*!
+        requires
+            - image_type1 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - image_type2 == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h
+            - is_same_object(in_img, out_img) == false
+        ensures
+            - 0-pads in_img so that it fits into a square whose side is computed as
+              max(num_rows(in_img), num_columns(in_img)) and stores into #out_img.
+              In particular, we will have:
+                - #img_out.nr() == max(num_rows(in_img), num_columns(in_img))
+                - #img_out.nc() == max(num_rows(in_img), num_columns(in_img))
+            - Returns a transformation object that maps points in in_img into their
+              corresponding location in #out_img.
+    !*/
+
 // ----------------------------------------------------------------------------------------
 
     template <
diff --git a/dlib/test/image.cpp b/dlib/test/image.cpp
index 08d27be9d..5659b0388 100644
--- a/dlib/test/image.cpp
+++ b/dlib/test/image.cpp
@@ -2257,6 +2257,29 @@ namespace
         }
     }
 
+    void test_letterbox_image()
+    {
+        print_spinner();
+        rgb_pixel black(0, 0, 0);
+        rgb_pixel white(255, 255, 255);
+        matrix<rgb_pixel> img_s(40, 60);
+        matrix<rgb_pixel> img_d;
+        assign_all_pixels(img_s, white);
+        const auto tform = letterbox_image(img_s, img_d, 30, interpolate_nearest_neighbor());
+        DLIB_TEST(tform.get_m() == identity_matrix<double>(2) * 0.5);
+        DLIB_TEST(tform.get_b() == dpoint(0, 5));
+
+        // manually generate the target image
+        matrix<rgb_pixel> img_t(30, 30);
+        assign_all_pixels(img_t, rgb_pixel(0, 0, 0));
+        matrix<rgb_pixel> img_w(20, 30);
+        assign_all_pixels(img_w, rgb_pixel(255, 255, 255));
+        rectangle r (0, 5, 30 - 1, 25 - 1);
+        auto si = sub_image(img_t, r);
+        assign_image(si, img_w);
+        DLIB_TEST(img_d == img_t);
+    }
+
     void test_draw_string()
     {
         print_spinner();
@@ -2386,6 +2409,7 @@ namespace
             test_null_rotate_image_with_interpolation();
             test_null_rotate_image_with_interpolation_quadratic();
             test_interpolate_bilinear();
+            test_letterbox_image();
             test_draw_string();
         }
     } a;