deep_matching.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 17, 07:58

deep_matching.cpp
View Options

	/*
	Copyright (C) 2014 Jerome Revaud

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http://www.gnu.org/licenses/>
	*/
	#include "deep_matching.h"
	#include "std.h"
	#include "conv.h"
	#include "maxfilter.h"



	// return size of atomic patches
	int get_atomic_patch_size( const dm_params_t* params )
	{
	int upsize = (1 << params->prior_img_downscale);
	return 4*upsize;
	}

	// crop dimensions to a multiple of patch_size
	void get_source_shape( const int width, const int height, const int patch_size, int* res ) {
	// crop the reference image to a multiple of patch size
	res[0] = patch_size * int(width / patch_size);
	res[1] = patch_size * int(height / patch_size);
	}

	// extract pixel descriptor for both images
	void extract_image_desc( image_t* img0, image_t* img1, const dm_params_t* params,
	float_layers desc0, float_layers desc1 )
	{
	// slightly reduce img0 size to fit the patch tiling
	int patch_size = get_atomic_patch_size( params );

	int size[2]; // = {width, height}
	get_source_shape( img0->width, img0->height, patch_size, size );
	image_crop(img0, size[0], size[1]);

	// extract gradient-based information
	*desc0 = extract_desc( img0, &params->desc_params, params->n_thread );
	*desc1 = extract_desc( img1, &params->desc_params, params->n_thread );
	}


	void avgpool2( float_layers* hog, const dm_params_t* params )
	{
	int niter = params->prior_img_downscale;
	while(niter--) {
	float_layers res = empty_layers(float,hog->tx/2,hog->ty/2,hog->tz);
	_avgpool2(hog,&res,params->n_thread);

	// replace hog by res
	free(hog->pixels);
	*hog = res;
	}
	}


	/* compute the grid of parent cell position, and their connection to children cells
	cells can be half-overlapping if <overlap>=1
	<dense_step> forces the grid spacing if >0
	*/
	void prepare_big_cells( const int imshape[2], int cell_size, int overlap, int child_overlap,
	int_cube* child_grid, float_image* child_norms, int dense_step,
	int_cube* grid, int_cube* children, float_image* norms )
	{
	int offset, step, gtx, gty;
	if( dense_step ) {
	step = dense_step;
	offset = 0;
	// we do not care if the patches are overlapping outside the image
	#define grid_size(imsize) (1+imsize/step)
	gtx = grid_size(imshape[0]);
	gty = grid_size(imshape[1]);
	#undef grid_size
	} else {
	// we want patches fully included in the image
	offset = cell_size/2;
	step = cell_size/(overlap+1);
	#define grid_size(imsize) (1+MAX(0,imsize-2*offset)/step)
	gtx = grid_size(imshape[0]);
	gty = grid_size(imshape[1]);
	#undef grid_size
	}

	assert(!grid->pixels);
	*grid = empty_cube(int,gtx,gty,2);

	assert(0<=overlap && overlap<=1);
	int nc = pow2(2+child_overlap); // number of children per cell
	if(child_grid) {
	assert(!norms->pixels);
	*norms = image_like(float,grid);
	assert(!children->pixels);
	*children = empty_cube(int,gtx,gty,nc);
	}

	_prepare_big_cells( cell_size, offset, step, child_grid, child_norms, grid, children, norms );
	}


	void sample_patches( float_layers* hog, int_cube* pos, int patch_size, int f, float norm, int n_thread,
	float_image* patches, float_array* norms )
	{
	assert(norm>0);
	const int npos = pos->tx*pos->ty;
	int_image new_pos = empty_image(int,2,npos);
	for(int i=0; i<2*npos; i++)
	new_pos.pixels[i] = (pos->pixels[i]-patch_size/2)/f;

	patch_size /= f;
	const int nh = get_patch_desc_dim(hog,patch_size);

	assert(!patches->pixels);
	*patches = empty_image(float,nh,npos);
	assert(norms->tx==npos);

	_sample_patches( hog, NULL, &new_pos, patch_size, norm, patches, norms, n_thread );

	free(new_pos.pixels);
	}


	const float trans_inv = 0.9f;

	void convolve_atomic_patches( float_layers* source, float_layers* target,
	const dm_params_t* params, res_scale* first_level )
	{
	const int extend = 1; // slightly spatially extend response maps
	const float norm = 1; // renorm patches

	const int f = first_level->f; // scale factor w.r.t. original image
	const int psize = first_level->patch_size; // current patch size

	// first, sample patches
	float_image patches = {0};
	assert(!first_level->norms.pixels);
	first_level->norms = image_like(float, &first_level->grid);
	float_array norms_arr = {first_level->norms.pixels, (int)IMG_SIZE(&first_level->norms)};
	sample_patches( source, &first_level->grid, psize, f, norm, params->n_thread, &patches, &norms_arr );
	//hash_image(&patches)

	// rectify the norm to a boolean (0 or 1) (useless ?)
	first_level->assign = empty_array(int,norms_arr.tx);
	int n=0, tx = patches.tx;
	for(int i=0; i<norms_arr.tx; i++) {
	norms_arr.pixels[i] = norms_arr.pixels[i]>0;

	// eliminate zero-norm patches
	if( norms_arr.pixels[i] ) {
	if( n < i ) // copy
	memcpy( patches.pixels + ntx, patches.pixels + itx, tx*sizeof(float));
	first_level->assign.pixels[i] = n++;
	} else
	first_level->assign.pixels[i] = -1;

	// convolution is not fully invariant to the image border:
	// blank cells outside the image are a bit disadvantageous
	if( norms_arr.pixels[i] == 0 )
	norms_arr.pixels[i] = 1-trans_inv;
	}
	patches.ty = n; // update new number of valid patches

	//hash_image(&first_level->norms)
	//hash_image(&patches)

	// compute the first level convolutions
	fastconv( &patches, target, psize/f, params->ngh_rad/f, extend, norm, params->n_thread, first_level );

	free(patches.pixels);
	}

	int_image* maxpool3_and_subsample2( float_layers* hog, int true_shape[2], int_image* offsets, float_layers* res, int nt )
	{
	assert(!res->pixels);
	if ( offsets->pixels == NULL )
	assert( hog->tx == true_shape[0] && hog->ty == true_shape[1] );

	// set downsampled size
	true_shape[0] = (true_shape[0]+1)/2;
	true_shape[1] = (true_shape[1]+1)/2;
	assert( true_shape[0]>0 && true_shape[1]>0 );

	if ( offsets->pixels == NULL ) {
	// joint max-pooling and subsampling
	*res = empty_layers(float, true_shape[0], true_shape[1], hog->tz);
	_max_filter_3_and_subsample_layers( hog, res, nt );
	return NULL;

	} else {
	// with offsets
	float_layers maxpooled_hog = layers_like(float, hog);
	_max_filter_3_layers( hog, &maxpooled_hog, nt );
	//CHECK_MAPS(&maxpooled_hog);

	// slightly bigger, so that mininum size always >= 2
	int width = (hog->tx+2)/2;
	int height = (hog->ty+2)/2;
	*res = empty_layers(float, width, height, hog->tz);
	_subsample2_offset( &maxpooled_hog, offsets, res, nt );
	free(maxpooled_hog.pixels);

	// compute new offsets
	int_image* res_offsets = NEW(int_image);
	*res_offsets = image_like(int, offsets);
	for(long i=0; i<IMG_SIZE(offsets); i++)
	res_offsets->pixels[i] = (int)floor( offsets->pixels[i]/2.f );
	return res_offsets;
	}
	}

	#define CHECK_MAPS(rmaps) assert(min_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))>=0 && \
	max_array_f((rmaps)->pixels,LAYERS_SIZE(rmaps))<=1.001)

	/* aggregate response maps of children patches to form response maps of parent patches */
	int sparse_conv( int_cube* children, int_array* children_assign, float_image* child_norms,
	int true_patch_size, float_layers* map, int_image* offsets, int nt,
	res_scale* res )
	{
	float_layers ext_map;
	if( MIN(map->tx,map->ty) < 5 ) {
	ext_map = zeros_layers(float,MAX(5,map->tx),MAX(5,map->ty),map->tz);
	for(int l=0; l<map->tz; l++)
	for(int j=0; j<map->ty; j++)
	for(int i=0; i<map->tx; i++)
	ext_map.pixels[(lext_map.ty + j)ext_map.tx + i] = map->pixels[(lmap->ty + j)map->tx + i];
	map = &ext_map;
	res->true_shape[0] = ext_map.tx;
	res->true_shape[1] = ext_map.ty;
	}

	int_image _children = reshape_z_xy(int, &res->children);

	if( offsets )
	res->offsets = empty_image(int, 2, _children.ty);

	assert(!res->res_map.pixels);
	res->res_map = empty_layers(float, map->tx, map->ty, _children.ty);
	int gap = true_patch_size / 4;
	assert(gap>0);
	float_array _norms = reshape_xy(float, &res->norms);
	float_array _child_norms = reshape_xy(float, child_norms);

	// allocate useless assign
	res->assign = empty_array(int, res->res_map.tz);
	for(int i=0; i<res->assign.tx; i++) res->assign.pixels[i] = i;

	int_array* _assign = NULL;
	int_array* _ch_assign = children_assign->pixels ? children_assign : NULL;
	int n = _sparse_conv( &_children, _ch_assign, gap, trans_inv, map, offsets,
	&_child_norms, &_norms, _assign, &res->res_map, &res->offsets, nt );
	//CHECK_MAPS(res);

	if(map==&ext_map) free(ext_map.pixels);
	return n;
	}

	res_scale new_pyramid_level(int f, int psize)
	{
	res_scale res = {0}; // initialize everything to 0/NULL
	res.f = f; // subsampling factor with respect to original image size
	res.patch_size = psize; // patch size in original image coordinates
	return res;
	}

	// Compute the multi-scale pyramid response
	void compute_matching_pyr( float_layers* source, float_layers* target, const dm_params_t* params,
	matching_pyramid_t& res_maps )
	{
	const int src_shape[2] = {source->tx, source->ty};
	int L = 0; // current pyramid level
	const int atomic_psize = get_atomic_patch_size( params );
	int psize = atomic_psize; // will grow by a factor 2 at each level
	int f = psize/4; // initial scaling factor

	// subsample if needed
	avgpool2( source, params );
	avgpool2( target, params );

	//hash_layers(source)
	//hash_layers(target)

	res_maps.clear();
	res_maps.push_back(new_pyramid_level(f,psize));
	res_scale child, last = &res_maps[res_maps.size()-1];

	// compute the initial patches in source image
	if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);
	prepare_big_cells( src_shape, psize, params->overlap<L+1, 0, NULL, NULL, 0, &last->grid, NULL, NULL );
	//hash_cube(&last->grid)

	//hash_layers(source)
	convolve_atomic_patches( source, target, params, last );
	//hash_layers(&last->res_map)
	if( params->verbose )
	std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);

	// non-linear correction
	if( params->nlpow>0 )
	fastipow( &last->res_map, params->nlpow, params->n_thread );

	//hash_layers(&last->res_map)

	const int dense_step = params->subsample_ref ? 0 : psize/(1+(params->overlap<1));

	// aggregate patches for all subsequent levels
	while( 2*psize <= MIN(params->max_psize, MAX(src_shape[0], src_shape[1])) ) {
	L++;
	f *= 2;
	psize *= 2;
	res_maps.push_back(new_pyramid_level(f,psize));
	child = &res_maps[res_maps.size()-2]; // previous level
	last = &res_maps[res_maps.size()-1]; // current level
	if( params->verbose ) std_printf("layer %d, patch_size = %dx%d\n", L, psize, psize);

	// max pooling + subsampling
	//CHECK_MAPS(&child->res_map);
	last->true_shape[0] = child->true_shape[0]; // will be modified in subsampled2()
	last->true_shape[1] = child->true_shape[1];
	float_layers subs_res_map = {0};
	int_image* offsets = maxpool3_and_subsample2( &child->res_map, last->true_shape, &child->offsets,
	&subs_res_map, params->n_thread );
	//CHECK_MAPS(&subs_res_map);

	// build the set of patches at this scale
	prepare_big_cells( src_shape, psize, params->overlap<L+1, params->overlap<L,
	&child->grid, &child->norms, dense_step, &last->grid, &last->children, &last->norms );
	//DA(last->true_shape,2)
	//hash_cube(&last->grid)
	//hash_image(&last->norms)
	//hash_cube(&last->children)

	// aggregate children response maps to form parent response maps
	sparse_conv( &last->children, &child->assign, &child->norms, psize/f, &subs_res_map, offsets,
	params->n_thread, last );
	free(subs_res_map.pixels);
	free_image(offsets);
	//CHECK_MAPS(&last->res_map);
	if( params->verbose )
	std_printf("remaining %ld big cells (actually, %d are unique)\n", IMG_SIZE(&last->grid), last->res_map.tz);

	// non-linear correction
	if( params->nlpow>0 )
	fastipow(&last->res_map, params->nlpow, params->n_thread );
	//hash_layers(&last->res_map)
	}
	}


	void free_matching_pyramid( matching_pyramid_t& res_maps ) {
	unsigned int i;
	for(i=0; i<res_maps.size(); i++) {
	res_scale& level = res_maps[i];

	free(level.grid.pixels);
	free(level.norms.pixels);
	free(level.assign.pixels);
	free(level.res_map.pixels);
	free(level.max_map.pixels);
	free(level.children.pixels);
	free(level.passed.pixels);
	}
	}


	#ifdef __APPLE__
	static int arg_sort_maxima(void* arr, const void* a, const void* b) {
	float diff = ((float)arr)[5((int)a)+4] - ((float)arr)[5((int)b)+4];
	return (diff<0) - (diff>0); // descending order
	}
	#else
	static int arg_sort_maxima(const void* a, const void* b, void* arr) {
	float diff = ((float)arr)[5((int)a)+4] - ((float)arr)[5((int)b)+4];
	return (diff<0) - (diff>0); // descending order
	}
	#endif

	void reorder_rows( int_image* img, int_array* order )
	{
	assert(order->tx==img->ty);
	const int tx = img->tx;
	int_image res = image_like(int, img);

	for(int i=0; i<order->tx; i++)
	memcpy(res.pixels + itx, img->pixels+order->pixels[i]tx, tx*sizeof(int));

	free(img->pixels);
	*img = res;
	}

	// return points corresponding to patch matches
	int_image* find_optimal_matchings( matching_pyramid_t& mp, const dm_params_t* params )
	{
	const int nobordure = 0;
	int_image* maxima = NEW(int_image);
	int_array order = {0};

	if( params->maxima_mode ) { // normal process: maxima detection

	float th=0;
	int check_parents=false, check_children=false;

	float_array sc_maxima = empty_array(float,int(mp.size()));
	for(unsigned int i=0; i<mp.size(); i++) sc_maxima.pixels[i]=1; // useless but well

	_extract_maxima( mp.data(), mp.size(), &sc_maxima, th, params->min_level, params->nlpow,
	check_parents, check_children, nobordure, maxima, params->n_thread );
	free(sc_maxima.pixels);

	order = empty_array(int,maxima->ty);
	for(int i=0; i<maxima->ty; i++) order.pixels[i] = maxima->ty-1-i; // last first

	} else { // we just analyse all cells at the top level
	const float_layers* rmap = &mp[mp.size()-1].res_map;
	const int tx = rmap->tx, txy=tx*rmap->ty;
	*maxima = empty_image(int, 5, (int)LAYERS_SIZE(rmap));

	for(int i=0; i<maxima->ty; i++) {
	int* row = maxima->pixels + 5*i;
	row[0] = mp.size()-1; // pyramid level
	row[1] = i/txy; // layer number
	row[2] = i%tx; // x position
	row[3] = (i%txy)/tx; // y position
	((float*)row)[4] = rmap->pixels[i];
	}
	//hash_image(maxima)

	order = empty_array(int,maxima->ty);
	for(int i=0; i<maxima->ty; i++) order.pixels[i] = i;
	#ifdef __APPLE__
	qsort_r(order.pixels, maxima->ty, sizeof(int), maxima->pixels, arg_sort_maxima);
	#else
	qsort_r(order.pixels, maxima->ty, sizeof(int), arg_sort_maxima, maxima->pixels);
	#endif
	}

	if( params->verbose>0 )
	std_printf("found %d local matches\n",maxima->ty);

	// reorder maxima
	reorder_rows( maxima, &order );
	free(order.pixels);
	return maxima;
	}


	static inline float ptdot( const float* m, float x, float y ) {
	return xm[0] + ym[1] + m[2];
	}

	void apply_rot( float_cube* corres, float rot[6] ) {
	assert( corres->tz == 6 );
	const int nb = IMG_SIZE(corres);
	float* p = corres->pixels;

	for(int i=0; i<nb; i++) {
	// only apply to coordinates of the first image
	float x = p[0], y = p[1];
	p[0] = ptdot(rot+0, x, y);
	p[1] = ptdot(rot+3, x, y);
	p += 6;
	}
	}


	/* this function gather correspondences from each local maximum in the
	response maps
	*/
	float_image* gather_correspondences( int src_shape[2], int target_shape[2],
	matching_pyramid_t& scales, int_image* maxima,
	const dm_params_t* params, full_corres_t* corres_out )
	{
	const int step = 4*scales[0].f; // bin size
	const int n_scales = (int)scales.size();
	const int tx = maxima->tx;
	const int n_maxima = maxima->ty;

	float_cube corres0 = zeros_cube(float, (src_shape[0]+step-1)/step, (src_shape[1]+step-1)/step,6);
	float_cube corres1 = zeros_cube(float, (target_shape[0]+step-1)/step, (target_shape[1]+step-1)/step,6);

	int i;
	// allocate temporary optimization maps
	for(i=0; i<n_scales; i++) {
	long size = LAYERS_SIZE(&scales[i].res_map);
	if( params->low_mem && size > 1000003 ) size = 1000003; // big prime
	assert( size <= 2147483647 \|\| !"try using -mem parameter");
	scales[i].passed = zeros_array(float, (int)size);
	}

	#if defined(USE_OPENMP)
	#pragma omp parallel for schedule(static,1) num_threads(params->n_thread)
	#endif
	for(i=0; i<n_maxima; i++) {
	if(params->verbose && i%100==0) std_printf("\rgathering correspondences %d%%...",100*i/n_maxima);
	int* m = maxima->pixels + tx*i;
	int level = m[0], num_map = m[1];
	int x = m[2], y = m[3];
	assert(level<n_scales);

	if( scales[level].offsets.pixels ) {
	// add offset to form real image coordinates
	x += scales[level].offsets.pixels[2*num_map+0];
	y += scales[level].offsets.pixels[2*num_map+1];
	}

	if( params->scoring_mode ) // new mode
	_argmax_correspondences( scales.data(), level, num_map, x, y, ((float*)m)[4],
	&corres0, step, &corres1, step, i );
	else // old iccv mode
	_argmax_correspondences_v1( scales.data(), level, num_map, x, y, m[0]((float)m)[4],
	&corres0, step, &corres1, step, i );
	}

	// free optimization maps
	for(i=0; i<n_scales; i++) {
	free( scales[i].passed.pixels );
	scales[i].passed.pixels = NULL;
	}

	if(params->verbose) std_printf("\n");

	if( params->rot45 ) { // rectify correspondences
	assert( corres_out );
	apply_rot( &corres0, corres_out->rot );
	apply_rot( &corres1, corres_out->rot );
	}

	// keep only reciprocal matches
	int nres;
	float* corres = _intersect_corres( &corres0, &corres1, &nres );
	float_image* res = NEW(float_image);
	*res = (float_image){corres, 6, nres};

	if( corres_out == NULL ) {
	free(corres0.pixels);
	free(corres1.pixels);
	}
	else { // save unfiltered correspondences
	corres_out->corres0 = corres0;
	corres_out->corres1 = corres1;
	}
	return res;
	}


	void eye_rot3x3( float rot[6] ) {
	memset( rot, 0, 6*sizeof(float));
	rot[0] = rot[4] = 1;
	}

	inline float bilinear_interp(const float* img, const int tx, const int ty, float x, float y ) {
	if( x < 0 \|\| x+1.001 >= tx ) return 0; // outside
	if( y < 0 \|\| y+1.001 >= ty ) return 0; // outside
	int ix = int(x);
	int iy = int(y);
	img += ix + iy*tx; // move pointer
	float rx = x - ix;
	float ry = y - iy;
	return (1-ry)((1-rx)img[0] + rx*img[1]) +
	ry ((1-rx)img[tx]+ rx*img[tx+1]);
	}

	void scale_rot3x3( float rot[6], float sc ) {
	for(int i=0; i<6; i++)
	rot[i] *= sc;
	}

	void inv_rot3x3( float rot[6], float res[6] ) {
	assert( fabs((rot[0]rot[4] - rot[1]rot[3]) - 1) < 1e-6 );
	// because rot is unitary, invert == transpose
	res[0] = rot[0];
	res[1] = rot[3];
	res[3] = rot[1];
	res[4] = rot[4];
	res[2] = -rot[2]rot[0] - rot[5]rot[3];
	res[5] = -rot[2]rot[1] - rot[5]rot[4];
	}



	// rotate a descriptor HOG image by a given angle
	float_layers* rotate45( float_layers* hog, const dm_params_t* params, full_corres_t* corres_out ) {
	assert( corres_out ); // we need it to write rot !
	const int patch_size = get_atomic_patch_size( params );
	const int n_rot45 = params->rot45;

	if( (n_rot45 % 8) == 0 ) { // nothing to do
	eye_rot3x3( corres_out->rot );
	return hog;
	}
	const int tx = hog->tx;
	const int ty = hog->ty;

	// rotation matrix
	float angle = n_rot45 * M_PI / 4;
	float c = cos(angle), s = sin(angle);
	float rot[6] = {c, -s, 0, s, c, 0};
	// pt_in_original_image = rot * pt_in_rotated_image

	// determine center of rotation before
	float cx_before = tx/2.0;
	float cy_before = ty/2.0;
	// determine center of rotation after
	float corners[2][4] = {{0, (float)tx, (float)tx, 0}, {0, 0, (float)ty, (float)ty}};
	for(int i=0; i<4; i++) { // rotate corners
	float x = corners[0][i], y = corners[1][i];
	corners[0][i] = ptdot(rot+0, x, y);
	corners[1][i] = ptdot(rot+3, x, y);
	}
	int rot_size[2] = {int(0.5 + max_array_f(corners[0], 4) - min_array_f(corners[0], 4)),
	int(0.5 + max_array_f(corners[1], 4) - min_array_f(corners[1], 4)) };
	get_source_shape( rot_size[0], rot_size[1], patch_size, rot_size );
	float cx_after = rot_size[0]/2.0;
	float cy_after = rot_size[1]/2.0;
	// compute the translation
	rot[2] = cx_before - ptdot(rot+0, cx_after, cy_after);
	rot[5] = cy_before - ptdot(rot+3, cx_after, cy_after);

	// create result
	assert( hog->tz == 9 );
	float_layers* rot_hog = NEW(float_layers);
	*rot_hog = empty_layers(float, rot_size[0], rot_size[1], 9);

	for(int c=0; c<hog->tz; c++) {
	const int src_c = (c<8) ? int((c+n_rot45+256)%8) : c; // roll channels except for last one (see hog.h)
	const float* f = hog->pixels + src_c * IMG_SIZE(hog);
	float* p = rot_hog->pixels + c * IMG_SIZE(rot_hog);

	for(int y=0; y<rot_size[1]; y++)
	for(int x=0; x<rot_size[0]; x++) {
	float rx = ptdot( rot+0, x, y);
	float ry = ptdot( rot+3, x, y);

	*p++ = bilinear_interp(f, tx, ty, rx, ry );
	}
	}

	// output inverted rot
	memcpy( corres_out->rot, rot, 6*sizeof(float) );

	return rot_hog;
	}


	// set default parameters
	void set_default_dm_params( dm_params_t* params )
	{
	// pixel descriptor params
	set_default_desc_params( &params->desc_params );

	// general parameters
	params->prior_img_downscale = 1; // resolution R = 1/2^downscale, default = 1/2
	params->rot45 = 0; // don't rotate the first image
	params->overlap = 999; // don't use overlapping patches
	params->subsample_ref = false; // don't subsample patches in reference image (=first image)
	params->nlpow = 1.4;
	params->ngh_rad = 0; // no limit by default
	params->maxima_mode = 0; // don't use maxima, just start from all top patches
	params->min_level = 2; // useless
	params->max_psize = 999; // maximum patch size
	params->low_mem = true; // optimize mem but then results are slightly unstable/non-reproducible
	params->verbose = 0;
	params->scoring_mode = 1; // improved scoring scheme
	params->n_thread = 1; // no multithreading by default
	}


	// main function
	float_image* deep_matching( image_t* img0, image_t* img1, const dm_params_t* params, full_corres_t* corres_out )
	{
	cout << "###########################################################" << endl;
	// verify parameters
	assert(between(0,params->prior_img_downscale,3));
	assert(between(0,params->overlap,999));
	assert(between(0,params->subsample_ref,1));
	assert(between(0.1,params->nlpow,10));
	assert(between(0,params->ngh_rad,1<<16));
	assert(between(0,params->maxima_mode,1));
	assert(between(0,params->min_level,4));
	assert(between(0,params->low_mem,1));
	assert(between(0,params->scoring_mode,1));
	assert(between(0,params->verbose,10));
	assert(between(1,params->n_thread,128));

	cout << "###########################################################" << endl;

	// extract pixel descriptors
	float_layers source, target;
	extract_image_desc( img0, img1, params, &source, &target );
	if( corres_out ) // the first image is rotated
	source = rotate45( source, params, corres_out );
	int src_shape[2] = {source->tx, source->ty};
	assert( LAYERS_SIZE(source) > 0 );
	int target_shape[2] = {target->tx, target->ty};
	assert( LAYERS_SIZE(target) > 0 );
	cout << "###########################################################" << endl;

	//hash_layers(source)
	//hash_layers(target)

	// compute local matchings
	matching_pyramid_t matching_pyr;
	compute_matching_pyr( source, target, params, matching_pyr );
	free_layers(source);
	free_layers(target);
	cout << "###########################################################" << endl;

	//hash_layers(&matching_pyr[matching_pyr.size()-1].res_map);

	// find optmial matchings (maxima)
	int_image* maxima = find_optimal_matchings(matching_pyr, params);
	cout << "###########################################################" << endl;

	//hash_image(maxima);

	// select the best displacements (maxpool merge)
	float_image* corres = gather_correspondences( src_shape, target_shape, matching_pyr, maxima, params, corres_out );
	cout << "###########################################################" << endl;

	//hash_image(corres);

	// free everything
	free_matching_pyramid(matching_pyr);
	free_layers(maxima);

	return corres;
	}


	void swap_first_second_img( float_cube* corres ) {
	assert( corres->tz == 6 );
	const int nb = IMG_SIZE(corres);
	float* p = corres->pixels;

	for(int i = 0; i < nb; i++) {
	float a = p[0];
	float b = p[1];
	float c = p[2];
	float d = p[3];
	*p++ = c;
	*p++ = d;
	*p++ = a;
	*p++ = b;
	p += 2;
	}
	}

	void rescale_corres( float_cube* corres, float f0, float f1, int code ) {
	assert( corres->tz == 6 );
	const int nb = IMG_SIZE(corres);
	float* p = corres->pixels;

	for(int i = 0; i < nb; i++) {
	p[0] *= f0;
	p[1] *= f0;
	p[2] *= f1;
	p[3] *= f1;
	p[5] = code;
	p += 6;
	}
	}

	// set default parameters
	void set_default_scalerot_params( scalerot_params_t* params ) {
	params->fast = true;
	params->min_sc0 = 0; // scale = 2^(-0/2) = 1
	params->max_sc0 = 5; // scale = 2^(-5/2) = 0.176
	params->min_sc1 = 0;
	params->max_sc1 = 5;
	params->min_rot = 0; // rot = 0*45 = 0
	params->max_rot = 8; // rot = 8*45 = 360
	}


	// main function for scale/rotation invariant version
	float_image* deep_matching_scale_rot( image_t* img0, image_t* img1, dm_params_t* params,
	const scalerot_params_t* sr_params ) {
	// verify parameters
	assert(sr_params->min_sc0 < sr_params->max_sc0);
	assert(sr_params->min_sc1 < sr_params->max_sc1);
	assert(between(0, sr_params->min_sc0, 5));
	assert(between(0, sr_params->max_sc0, 5));
	assert(between(0, sr_params->min_sc1, 5));
	assert(between(0, sr_params->max_sc1, 5));
	assert(sr_params->min_rot >= 0);
	assert(between(1,sr_params->max_rot - sr_params->min_rot, 8));

	// init shape
	const int psize = get_atomic_patch_size(params);
	int imshape0[2];
	get_source_shape( img0->width, img0->height, psize, imshape0 );
	int imshape1[2] = {img1->width, img1->height};

	// check dm params to ensure everything goes fine from now on
	#define mean_dim(shape) ((shape[0] + shape[1])/2)
	params->max_psize = MIN(mean_dim(imshape0), mean_dim(imshape1));
	const int verbose = params->verbose;
	params->verbose = MAX(0, verbose - 1); // decrease for inner deepmatchings

	// prepare output
	const int step0 = psize/2;
	const int step1 = psize/2;
	float_cube all_corres0 = zeros_cube(float, (imshape0[0]+step0/2-1)/step0, (imshape0[1]+step0/2-1)/step0, 6);
	float_cube all_corres1 = zeros_cube(float, (imshape1[0]+step1/2-1)/step1, (imshape1[1]+step1/2-1)/step1, 6);
	full_corres_t out;

	const int NS = 5;
	image_t *scaled_images1[NS] = {NULL};

	// loop over all scale*rot combinations
	for(int sc0 = sr_params->min_sc0;
	sc0 < sr_params->max_sc0;
	sc0++) {
	const float scale0 = pow(2, -0.5*sc0 ); // scale factor for img0
	assert( scale0<=1 && sc0<5 );
	image_t* scaled_img0 = ( scale0 >= 1 ) ? img0 :
	image_resize_bilinear_scale( img0, scale0 );

	for(int sc1 = sr_params->min_sc1;
	sc1 < sr_params->max_sc1;
	sc1++) {
	const float scale1 = pow(2, -0.5*sc1 ); // scale factor for img1
	assert( scale1<=1 && sc1<5 );
	// optimization, deactivate only if eg. both images are blurry
	if( sr_params->fast && !(scale0==1 \|\| scale1==1)) continue;

	image_t* scaled_img1 = scaled_images1[sc1 - sr_params->min_sc1];
	if( scaled_img1 == NULL ) {
	scaled_img1 = ( scale1 >= 1 ) ? img1 :
	image_resize_bilinear_scale( img1, scale1 );
	// remember result
	scaled_images1[sc1 - sr_params->min_sc1] = scaled_img1;
	}

	for(int rotation = sr_params->min_rot;
	rotation < sr_params->max_rot;
	rotation++) {
	assert( rotation >= 0 );
	const int rot_scale_code = 8(sc15+sc0) + (rotation%8); // cannot be negative, because of bin count

	if( verbose )
	std_printf( "processing scale = (x%g, x%g) + rotation = %d deg (code %d)...\n",
	scale0, scale1, 45*rotation, rot_scale_code);

	float rot0[6], rot1[6];

	// compute correspondences with rotated+scaled image
	#define max_dim(img) MAX(img->width, img->height)
	if( max_dim(scaled_img0) >= max_dim(scaled_img1) ) { // first image is always the largest
	params->rot45 = rotation;

	float_image* corres = deep_matching(scaled_img0, scaled_img1, params, &out );
	free_image( corres ); // we don't care

	inv_rot3x3(out.rot, rot0);
	eye_rot3x3(rot1);

	} else { // scaled_img1 is larger
	params->rot45 = -rotation;

	float_image* corres = deep_matching(scaled_img1, scaled_img0, params, &out );
	free_image( corres ); // we don't care

	// swap first and second image coordinates
	memswap( &out.corres0, &out.corres1, sizeof(float_cube) );
	swap_first_second_img( &out.corres0 );
	swap_first_second_img( &out.corres1 );

	inv_rot3x3(out.rot, rot1);
	eye_rot3x3(rot0);
	}

	// change scale of correspondences
	rescale_corres( &out.corres0, 1/scale0, 1/scale1, rot_scale_code );
	rescale_corres( &out.corres1, 1/scale0, 1/scale1, rot_scale_code );
	scale_rot3x3(rot0, scale0);
	scale_rot3x3(rot1, scale1);

	// merge correspondences in the reference frame
	merge_corres( rot0, rot1,
	psize, psize, &out.corres0, &out.corres1, 2,
	step0, step1, &all_corres0, &all_corres1 ); // finer grid for merge

	free(out.corres0.pixels);
	free(out.corres1.pixels);
	}
	}

	// free memory
	if( img0 != scaled_img0 )
	image_delete( scaled_img0 );
	}

	// final intersection
	int nres;
	float* corres = _intersect_corres( &all_corres0, &all_corres1, &nres );
	float_image* res = NEW(float_image);
	*res = (float_image){corres, 6, nres};

	// free memory
	for(int i=0; i<NS; i++)
	if( scaled_images1[i] != img1 )
	image_delete( scaled_images1[i] );
	free(all_corres0.pixels);
	free(all_corres1.pixels);

	return res;
	}

deep_matching.cppNo OneTemporaryActions

File Metadata

deep_matching.cppView Options

Event Timeline

deep_matching.cpp
No OneTemporary
Actions

deep_matching.cpp
View Options