mirror of https://github.com/davisking/dlib.git
Fleshed out example program.
This commit is contained in:
parent
cd25356e26
commit
45da41c55f
|
@ -2,75 +2,104 @@
|
||||||
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# You need to compile the dlib python interface before you can use this
|
# This example program shows how to use the dlib sequence segmentation tools from within a
|
||||||
# file. To do this, run compile_dlib_python_module.bat. You also need to
|
# python program. In particular, we will create a simple training dataset, learn a
|
||||||
# have the boost-python library installed. On Ubuntu, this can be done easily by running
|
# sequence segmentation model, and then test it on some sequences.
|
||||||
# the command: sudo apt-get install libboost-python-dev
|
#
|
||||||
|
# COMPILING THE DLIB PYTHON INTERFACE
|
||||||
|
# You need to compile the dlib python interface before you can use this file. To do
|
||||||
|
# this, run compile_dlib_python_module.bat. This should work on any operating system so
|
||||||
|
# long as you have CMake and boost-python installed. On Ubuntu, this can be done easily
|
||||||
|
# by running the command: sudo apt-get install libboost-python-dev cmake
|
||||||
|
|
||||||
|
|
||||||
# asfd
|
|
||||||
import dlib
|
import dlib
|
||||||
|
|
||||||
|
|
||||||
|
# In a sequence segmentation task we are given a sequence of objects (e.g. words in a
|
||||||
|
# sentence) and we are supposed to detect certain subsequences (e.g. named entities). In
|
||||||
|
# the code below we create some very simple sequence/segmentation training pairs. In
|
||||||
|
# particular, each element of a sequence is represented by a vector which describes
|
||||||
|
# important properties of the element. The idea is to use vectors that contain information
|
||||||
|
# useful for detecting whatever kind of subsequences you are interested in detecting.
|
||||||
|
|
||||||
|
# To keep this example simple we will use very simple vectors. Specifically, each vector
|
||||||
|
# is 2D and is either the vector [0 1] or [1 0]. Moreover, we will say that the
|
||||||
|
# subsequences we want to detect are any runs of the [0 1] vector. Note that the code
|
||||||
|
# works with both dense and sparse vectors. The following if statement constructs either
|
||||||
|
# kind depending on the value in use_sparse_vects.
|
||||||
use_sparse_vects = False
|
use_sparse_vects = False
|
||||||
|
|
||||||
if use_sparse_vects:
|
|
||||||
samples = dlib.sparse_vectorss()
|
|
||||||
else:
|
|
||||||
samples = dlib.vectorss()
|
|
||||||
|
|
||||||
segments = dlib.rangess()
|
|
||||||
|
|
||||||
if use_sparse_vects:
|
if use_sparse_vects:
|
||||||
|
training_sequences = dlib.sparse_vectorss()
|
||||||
inside = dlib.sparse_vector()
|
inside = dlib.sparse_vector()
|
||||||
outside = dlib.sparse_vector()
|
outside = dlib.sparse_vector()
|
||||||
|
# Add index/value pairs to each sparse vector. Any index not mentioned in a sparse
|
||||||
|
# vector is implicitly associated with a value of zero.
|
||||||
inside.append(dlib.pair(0,1))
|
inside.append(dlib.pair(0,1))
|
||||||
outside.append(dlib.pair(1,1))
|
outside.append(dlib.pair(1,1))
|
||||||
else:
|
else:
|
||||||
|
training_sequences = dlib.vectorss()
|
||||||
inside = dlib.vector([0, 1])
|
inside = dlib.vector([0, 1])
|
||||||
outside = dlib.vector([1, 0])
|
outside = dlib.vector([1, 0])
|
||||||
|
|
||||||
samples.resize(2)
|
# Here we make our training sequences and their annotated subsegments. We create two
|
||||||
|
# training sequences.
|
||||||
|
segments = dlib.rangess()
|
||||||
|
training_sequences.resize(2)
|
||||||
segments.resize(2)
|
segments.resize(2)
|
||||||
|
|
||||||
samples[0].append(outside)
|
# training_sequences[0] starts out empty and we append vectors onto it. Note that we wish
|
||||||
samples[0].append(outside)
|
# to detect the subsequence of "inside" vectors within the sequence. So the output should
|
||||||
samples[0].append(inside)
|
# be the range (2,5). Note that this is a "half open" range meaning that it starts with
|
||||||
samples[0].append(inside)
|
# the element with index 2 and ends just before the element with index 5.
|
||||||
samples[0].append(inside)
|
training_sequences[0].append(outside) # index 0
|
||||||
samples[0].append(outside)
|
training_sequences[0].append(outside) # index 1
|
||||||
samples[0].append(outside)
|
training_sequences[0].append(inside) # index 2
|
||||||
samples[0].append(outside)
|
training_sequences[0].append(inside) # index 3
|
||||||
|
training_sequences[0].append(inside) # index 4
|
||||||
|
training_sequences[0].append(outside) # index 5
|
||||||
|
training_sequences[0].append(outside) # index 6
|
||||||
|
training_sequences[0].append(outside) # index 7
|
||||||
segments[0].append(dlib.range(2,5))
|
segments[0].append(dlib.range(2,5))
|
||||||
|
|
||||||
|
# Add another training sequence
|
||||||
samples[1].append(outside)
|
training_sequences[1].append(outside) # index 0
|
||||||
samples[1].append(outside)
|
training_sequences[1].append(outside) # index 1
|
||||||
samples[1].append(inside)
|
training_sequences[1].append(inside) # index 2
|
||||||
samples[1].append(inside)
|
training_sequences[1].append(inside) # index 3
|
||||||
samples[1].append(inside)
|
training_sequences[1].append(inside) # index 4
|
||||||
samples[1].append(inside)
|
training_sequences[1].append(inside) # index 5
|
||||||
samples[1].append(outside)
|
training_sequences[1].append(outside) # index 6
|
||||||
samples[1].append(outside)
|
training_sequences[1].append(outside) # index 7
|
||||||
segments[1].append(dlib.range(2,6))
|
segments[1].append(dlib.range(2,6))
|
||||||
|
|
||||||
|
|
||||||
|
# Now that we have a simple training set we can train a sequence segmenter. However, the
|
||||||
|
# sequence segmentation trainer has some optional parameters we can set. These parameters
|
||||||
|
# determine properties of the segmentation model we will learn. See the dlib documentation
|
||||||
|
# for the sequence_segmenter object for a full discussion of their meanings.
|
||||||
params = dlib.segmenter_params()
|
params = dlib.segmenter_params()
|
||||||
#params.be_verbose = True
|
|
||||||
params.window_size = 1
|
params.window_size = 1
|
||||||
params.use_high_order_features = False
|
params.use_high_order_features = False
|
||||||
params.C = 1
|
params.use_BIO_model = True
|
||||||
print "params:", params
|
params.C = 1
|
||||||
|
|
||||||
df = dlib.train_sequence_segmenter(samples, segments, params)
|
# Train a model
|
||||||
|
model = dlib.train_sequence_segmenter(training_sequences, segments, params)
|
||||||
|
|
||||||
print len(df.segment_sequence(samples[0]))
|
# A segmenter model takes a sequence of vectors and returns an array of detected ranges.
|
||||||
print df.segment_sequence(samples[0])[0]
|
# So for example, we can give it the first training sequence and it will predict the
|
||||||
|
# locations of the subsequences. This statement will correctly print 2,5.
|
||||||
|
print model.segment_sequence(training_sequences[0])[0]
|
||||||
|
|
||||||
|
# We can also measure the accuracy of a model relative to some labeled data. This
|
||||||
|
# statement prints the precision, recall, and F1-score of the model relative to the data in
|
||||||
|
# training_sequences/segments.
|
||||||
|
print "Test on training data:", dlib.test_sequence_segmenter(model, training_sequences, segments)
|
||||||
|
|
||||||
|
# We can also do n-fold cross-validation and print the resulting precision, recall, and
|
||||||
|
# F1-score.
|
||||||
|
num_folds = 2
|
||||||
|
print "cross validation:", dlib.cross_validate_sequence_segmenter(training_sequences, segments, num_folds, params)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print df.weights
|
|
||||||
|
|
||||||
#res = dlib.test_sequence_segmenter(df, samples, segments)
|
|
||||||
res = dlib.cross_validate_sequence_segmenter(samples, segments, 2, params)
|
|
||||||
|
|
||||||
print res
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue