Add input_rgb_image_pair layer and new net visitors documentation (#2497)

* Add input_rgb_image layer documentation * fix typo * Add index entries for missing network visitors * update Barlow Twins loss links * add missing yolo rect docs, links to yolo paper and better Barlow Twins formatting
2022-02-02 22:36:48 +09:00 · 2022-02-02 22:36:48 +09:00 · 3d4c14e633
parent 21651f498e
commit 3d4c14e633
3 changed files with 77 additions and 7 deletions
--- a/docs/docs/imaging.xml
+++ b/docs/docs/imaging.xml
@ -98,6 +98,7 @@
         <item>evaluate_detectors</item>
         <item>full_object_detection</item>
         <item>mmod_rect</item>
+         <item>yolo_rect</item>
         <item>scan_image</item>
         <item>scan_image_movable_parts</item>
         <item>find_points_above_thresh</item>
@ -2551,6 +2552,19 @@
                                 
      </component>
            
+   <!-- ************************************************************************* -->
+
+      <component>
+         <name>yolo_rect</name>
+         <file>dlib/image_processing.h</file>
+         <spec_file link="true">dlib/image_processing/full_object_detection_abstract.h</spec_file>
+         <description>
+                This is a simple struct that is used to give training data and receive detections
+                from the <a href="ml.html#loss_yolo_">YOLO loss layer for object detection</a>.
+         </description>
+
+      </component>
+
   <!-- ************************************************************************* -->

      <component>
--- a/docs/docs/ml.xml
+++ b/docs/docs/ml.xml
@ -126,6 +126,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
               <item>input_rgb_image</item>
               <item>input_rgb_image_sized</item>
               <item>input_rgb_image_pyramid</item>
+               <item>input_rgb_image_pair</item>
               <item>input_grayscale_image_pyramid</item>
               <item>
                  <name>EXAMPLE_INPUT_LAYER</name>
@ -355,7 +356,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
               </item>
               <item>
                  <name>loss_barlow_twins</name>
-                  <link>dlib/dnn/loss_abstract.h.html#loss_barlow_twins_</link>
+                  <link>#loss_barlow_twins_</link>
               </item>
            </sub>
         </item>
@ -817,6 +818,28 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         </description>
      </component>

+
+   <!-- ************************************************************************* -->
+
+      <component>
+         <name>input_rgb_image_pair</name>
+         <file>dlib/dnn.h</file>
+         <spec_file link="true">dlib/dnn/input_abstract.h</spec_file>
+         <description>
+            This is a simple input layer type for use in a deep neural network
+            which takes a pair of RGB images as input and loads it into a network.
+            It is useful when you want to input image pairs that are related to each other,
+            for instance, they are different distorted views of the same original image.
+            This input layer is meant to be used with a loss layer such as the
+            <a href="#loss_barlow_twins_">Barlow Twins loss layer</a>.  You can also
+            convert between <a href="#input_rgb_image">input_rgb_image</a> and
+            this input layer by copy construction or assignment.
+         </description>
+         <examples>
+            <example>dnn_self_supervised_learning_ex.cpp.html</example>
+         </examples>
+      </component>
+
   <!-- ************************************************************************* -->

      <component>
@ -855,13 +878,13 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         <file>dlib/dnn.h</file>
         <spec_file link="true">dlib/dnn/loss_abstract.h</spec_file>
         <description>
-            This object is a <a href="dlib/dnn/loss_abstract.h.html#EXAMPLE_LOSS_LAYER_">loss layer</a> 
-            for a deep neural network.  In particular, it implements the YOLO detection loss defined in 
+            This object is a <a href="dlib/dnn/loss_abstract.h.html#EXAMPLE_LOSS_LAYER_">loss layer</a>
+            for a deep neural network.  In particular, it implements the YOLO detection loss defined in
            the paper:
-            <blockquote>YOLOv3: An Incremental Improvement by Joseph Redmon and Ali Farhadi.</blockquote>
-         
+            <blockquote><a href="https://arxiv.org/abs/1804.02767">YOLOv3: An Incremental Improvement</a> by Joseph Redmon and Ali Farhadi.</blockquote>
+
            This means you use this loss if you want to detect the locations of objects
-            in images. 
+            in images.
         </description>
         <examples>
            <example>dnn_yolo_train_ex.cpp.html</example>
@ -916,6 +939,32 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
         </description>
      </component>

+   <!-- ************************************************************************* -->
+
+      <component>
+         <name>loss_barlow_twins_</name>
+         <file>dlib/dnn.h</file>
+         <spec_file link="true">dlib/dnn/loss_abstract.h</spec_file>
+         <description>
+            This object is a <a href="dlib/dnn/loss_abstract.h.html#EXAMPLE_LOSS_LAYER_">loss layer</a>
+            for a deep neural network.  In particular, it implements the Barlow Twins loss layer presented
+            in the paper:
+
+            <blockquote>
+            <a href="https://arxiv.org/abs/2103.03230">Barlow Twins: Self-Supervised Learning
+            via Redundancy Reduction</a>, by Jure Zbontar, Li Jing, Ishan Misra, Yann LeCun, St&#233;phane Deny.
+            </blockquote>
+
+            This means you use this loss to learn useful representations from data that has no label
+            information.  Useful representations mean that can be used to train another downstream task,
+            such as classification.  In particular, this loss function applies the redundancy reduction
+            principle to the representations learned by the network it sits on top of.
+         </description>
+         <examples>
+            <example>dnn_self_supervised_learning_ex.cpp.html</example>
+         </examples>
+      </component>
+
   <!-- ************************************************************************* -->

      <component>
--- a/docs/docs/term_index.xml
+++ b/docs/docs/term_index.xml
@ -39,6 +39,9 @@
         <term file="dlib/dnn/utilities_abstract.h.html" name="randomize_parameters" include="dlib/dnn.h"/>
         <term file="dlib/dnn/utilities_abstract.h.html" name="input_tensor_to_output_tensor" include="dlib/dnn.h"/>
         <term file="dlib/dnn/utilities_abstract.h.html" name="output_tensor_to_input_tensor" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/utilities_abstract.h.html" name="count_parameters" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/utilities_abstract.h.html" name="set_all_learning_rate_multipliers" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/utilities_abstract.h.html" name="set_learning_rate_multipliers_range" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="tuple_head" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="tuple_tail" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="get_learning_rate_multiplier" include="dlib/dnn.h"/>
@ -101,6 +104,8 @@
         <term file="dlib/dnn/layers_abstract.h.html" name="CONV_MODE" include="dlib/dnn.h"/>
         <term file="dlib/dnn/layers_abstract.h.html" name="FC_MODE" include="dlib/dnn.h"/>
         <term file="dlib/dnn/layers_abstract.h.html" name="set_all_bn_running_stats_window_sizes" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/layers_abstract.h.html" name="disable_duplicative_biases" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/layers_abstract.h.html" name="fuse_layers" include="dlib/dnn.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="tensor" include="dlib/cuda/tensor.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="resizable_tensor" include="dlib/cuda/tensor.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="alias_tensor_instance" include="dlib/cuda/tensor.h"/>
@ -123,6 +128,7 @@
         <term file="ml.html" name="input_rgb_image" include="dlib/dnn.h"/>
         <term file="ml.html" name="input_rgb_image_sized" include="dlib/dnn.h"/>
         <term file="ml.html" name="input_rgb_image_pyramid" include="dlib/dnn.h"/>
+         <term file="ml.html" name="input_rgb_image_pair" include="dlib/dnn.h"/>
         <term file="ml.html" name="input_grayscale_image_pyramid" include="dlib/dnn.h"/>

         <term file="ml.html" name="dnn_trainer" include="dlib/dnn.h"/>
@ -149,7 +155,7 @@
         <term file="dlib/dnn/loss_abstract.h.html" name="mmod_options" include="dlib/dnn.h"/>
         <term file="ml.html" name="loss_yolo_" include="dlib/dnn.h"/>
         <term file="dlib/dnn/loss_abstract.h.html" name="yolo_options" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/loss_abstract.h.html" name="loss_barlow_twins_" include="dlib/dnn.h"/>
+         <term file="ml.html" name="loss_barlow_twins_" include="dlib/dnn.h"/>

         <term file="dlib/dnn/solvers_abstract.h.html" name="EXAMPLE_SOLVER" include="dlib/dnn.h"/>
         <term file="dlib/dnn/solvers_abstract.h.html" name="sgd" include="dlib/dnn.h"/>
@ -1649,6 +1655,7 @@
         <term file="dlib/image_processing/object_detector_abstract.h.html" name="full_detection"        include="dlib/image_processing.h"/>
         <term file="imaging.html" name="full_object_detection"   include="dlib/image_processing.h"/>
         <term file="imaging.html" name="mmod_rect"   include="dlib/image_processing.h"/>
+         <term file="imaging.html" name="yolo_rect"   include="dlib/image_processing.h"/>
         <term file="dlib/image_processing/full_object_detection_abstract.h.html" name="all_parts_in_rect" include="dlib/image_processing.h"/>

         <term file="imaging.html" name="scan_image_movable_parts" include="dlib/image_processing.h"/>