me.c
x264_me_search_ref
Called by
x264_mb_analyse_inter_p8x8_mixed_ref
x264_mb_analyse_inter_b16x16
x264_mb_analyse_inter_p16x16
x264_me_search
x264_slicetype_mb_cost (slicetype.c)
x264_mb_analyse_inter_p8x8
x264_mb_analyse_inter_p16x8
x264_mb_analyse_inter_p8x16
x264_mb_analyse_inter_p4x4
x264_mb_analyse_inter_p8x4
x264_mb_analyse_inter_p4x8
x264_mb_analyse_inter_b16x8
x264_mb_analyse_inter_b8x16
x264_mb_analyse_inter_b8x8
Parameters
x264_t *h
x264_me_t *m
int16_t (*mvc)[2]
int i_mvc
int *p_halfpel_thresh
Initialize
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const int i_pixel = m->i_pixel;
int i_me_range = h->param.analyse.i_me_range; //gets "Maximum motion vector search range." Set as --merange. default is 16
int bmx, bmy, bcost;
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
uint8_t *p_fref = m->p_fref[0];
DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int i, j;
int dir;
int costs[6];
int mv_x_min = h->mb.mv_min_fpel[0]; //what are these?
int mv_y_min = h->mb.mv_min_fpel[1];
int mv_x_max = h->mb.mv_max_fpel[0];
int mv_y_max = h->mb.mv_max_fpel[1];
#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )
const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 ); //make sure bmx falls between mv_x_min*4 and mv_x_max*4
bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );
pmx = ( bmx + 2 ) >> 2;
pmy = ( bmy + 2 ) >> 2;
bcost = COST_MAX;
Prediction
/* check the MVP */ //find the cost of the MVP (motion vector prediction?)
// COST_MV( pmx, pmy ); //fully expands to:
{
int cost = h->pixf.fpelcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE,
&p_fref[(pmy)*m->i_stride[0]+(pmx)], m->i_stride[0] )
+ (p_cost_mvx[(pmx)<<2] + p_cost_mvy[(pmy)<<2]);
if((cost)<(bcost))
{
(bcost)=(cost);
(bmx)=(pmx);
(bmy)=(pmy);
};
}
/* Because we are rounding the predicted motion vector to fullpel, there will be
* an extra MV cost in 15 out of 16 cases. However, when the predicted MV is
* chosen as the best predictor, it is often the case that the subpel search will
* result in a vector at or next to the predicted motion vector. Therefore, it is
* sensible to remove the cost of the MV from the rounded MVP to avoid unfairly
* biasing against use of the predicted motion vector. */
bcost -= BITS_MVD( pmx, pmy );
for( i = 0; i < i_mvc; i++ )
{
int mx = (mvc[i][0] + 2) >> 2;
int my = (mvc[i][1] + 2) >> 2;
if( (mx | my) && ((mx-bmx) | (my-bmy)) )
{
mx = x264_clip3( mx, mv_x_min, mv_x_max );
my = x264_clip3( my, mv_y_min, mv_y_max );
COST_MV( mx, my );
}
}
Motion Search
COST_MV( 0, 0 );
Diamond Search (radius 1) (aka Gradient Descent)
/* diamond search, radius 1 */
i = 0;
do
{
//DIA1_ITER( bmx, bmy );
{
omx = bmx; omy = bmy;
{
uint8_t *pix_base = p_fref + omx + omy*m->i_stride[0];
h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0],
pix_base + (0) + (-1)*m->i_stride[0],
pix_base + (0) + (1)*m->i_stride[0],
pix_base + (-1) + (0)*m->i_stride[0],
pix_base + (1) + (0)*m->i_stride[0],
m->i_stride[0], costs );
costs[0] += (p_cost_mvx[(omx+(0))<<2] + p_cost_mvy[(omy+(-1))<<2]);
costs[1] += (p_cost_mvx[(omx+(0))<<2] + p_cost_mvy[(omy+(1))<<2]);
costs[2] += (p_cost_mvx[(omx+(-1))<<2] + p_cost_mvy[(omy+(0))<<2]);
costs[3] += (p_cost_mvx[(omx+(1))<<2] + p_cost_mvy[(omy+(0))<<2]);
if((costs[0])<(bcost))
{
(bcost)=(costs[0]);
(bmx)=(omx+(0));
(bmy)=(omy+(-1));
};
if((costs[1])<(bcost))
{
(bcost)=(costs[1]);
(bmx)=(omx+(0));
(bmy)=(omy+(1));
};
if((costs[2])<(bcost))
{
(bcost)=(costs[2]);
(bmx)=(omx+(-1));
(bmy)=(omy+(0));
};
if((costs[3])<(bcost))
{
(bcost)=(costs[3]);
(bmx)=(omx+(1));
(bmy)=(omy+(0));
};
};
}
if( (bmx == omx) & (bmy == omy) ) //we are at min (i.e. non of the of the other values produced a smaller cost)
break;
if( !CHECK_MVRANGE(bmx, bmy) )
break;
} while( ++i < i_me_range );
break;
Cost Calculations
<cancan101> in the the macro COST_MV
<cancan101> cost is the sum of two parts
<cancan101> the results of the compare operation
<cancan101> and BITS_MVD
<cancan101> does BITS_MVD represent the cost of encoding the MV?
<Alex_W> yes
<cancan101> is it an estimate ?
<Dark_Shikari> for CAVLC, it is exact
<Dark_Shikari> (well, exact bit cost * lambda)
<Dark_Shikari> we have a pending patch that changes it to a different approximation which, paradoxically, is better even for CAVLC... but that's another story
<cancan101> for the purposes of implementing this simplified MEtor
<cancan101> what lamba should i use
<cancan101> lambda
<Dark_Shikari> just grab the one for, say, QP24
<Dark_Shikari> its a reasonable starting point
<cancan101> and costs for vector length:
<cancan101> bs_size_se?
<Dark_Shikari> see analyse_load_costs in analyse.c
<Dark_Shikari> a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
<Dark_Shikari> we switched from bs_size_se to that because it gave better results
<Dark_Shikari> but you can use whatever follows that distribution of costs and is easiest for you
<cancan101> and i is the length measured in quarter pels?
<cancan101> of each component
Jan 24 19:43:56 <cancan101> Dark_Shikari: lamba = 4
Jan 24 19:44:01 <cancan101> is a good guess?
Jan 24 19:44:10 <cancan101> / appx?
Jan 24 19:44:39 <Dark_Shikari> a good starting point, sure
Jan 24 19:44:54 <cancan101> so something like:
Jan 24 19:44:54 <cancan101> inline unsigned mvCost(int dx, int dy){
Jan 24 19:44:54 <cancan101> dx = (abs(dx))<<2;
Jan 24 19:44:54 <cancan101> dy = (abs(dy))<<2;
Jan 24 19:44:54 <cancan101> int xCost = round((log2f(dx+1)*2 + 0.718f + !!dx) + .5f);
Jan 24 19:44:54 <cancan101> int yCost = round((log2f(dy+1)*2 + 0.718f + !!dy) + .5f);
Jan 24 19:44:54 <cancan101> int lambda = 4;
Jan 24 19:44:54 <cancan101> return lambda * (xCost+yCost);
Jan 24 19:44:54 <cancan101> }
Jan 24 19:45:42 <Dark_Shikari> Sure, but why not just use the tables that already exist?
Jan 24 19:45:49 <cancan101> i will
Jan 24 19:45:55 <cancan101> just seeing if there is a bug
Jan 24 19:45:57 <cancan101> somewhere
Jan 24 19:45:57 <Dark_Shikari> ah k
Jan 24 19:46:03 <cancan101> before i make things more comlicaed
Jan 24 19:46:04 <Dark_Shikari> remember you do have to init the table first, of course
Jan 24 19:46:08 <Dark_Shikari> see what lowres_context_init does in slicetype.c
Jan 24 19:46:10 <cancan101> its finding MVs
Jan 24 19:46:15 <Dark_Shikari> k
MV Storage
<Dark_Shikari> x264_frame_t->mv
<Dark_Shikari> Note these will only be valid for inter blocks
<Dark_Shikari> if you want to see how the mvs are stored, see common/macroblock.c lines 1273 through 1313
<Dark_Shikari> each macroblock has 16 MVs, each of which is a pair of 16-bit signed ints
<cancan101> 16
<cancan101> for the subpartions?
<Dark_Shikari> Yes
<Dark_Shikari> 16 are *always* stored, just for simplciity
<Dark_Shikari> obviously, if they're all the same, they'll be identical (16x16 partition)
<Dark_Shikari> cancan101: h->mb.mv
<cancan101> what is mv[0] vs mv[1]?
<Dark_Shikari> mv[list][position][x/y]
Jan 24 19:46:26 <cancan101> when its intra
Jan 24 19:46:29 <cancan101> are the Mvs =0?
Jan 24 19:46:36 <Dark_Shikari> yes
Jan 24 19:46:41 <cancan101> ok
Jan 24 19:46:41 <Dark_Shikari> that's how the spec defines it
Jan 24 19:46:47 <cancan101> i mean
Jan 24 19:46:51 <cancan101> stored in the h
Jan 24 19:46:56 <cancan101> h->...
Jan 24 19:47:11 <cancan101> wither way
Jan 24 19:47:22 <Dark_Shikari> yes
Controlling ME for Subpartions
<cancan101> can you force x264 to only look for 16x16 MVs (i.e. no subpartions)?
<Dark_Shikari> --partitions none
<Dark_Shikari> in fact, just disabling p8x8/b8x8 is sufficient
<Dark_Shikari> as the others are intra options
<Dark_Shikari> its a command line option
<Dark_Shikari> it affects param.inter
<Dark_Shikari> param.analyse.inter I think
<cancan101> -A
<Dark_Shikari> h->param.analyse.inter yes
<Dark_Shikari> also known as -A (--analyse)
<Dark_Shikari> all aliases for the same thing
Reference Frames
<Dark_Shikari> h->mb.pic.p_fref[list][ref] is a reference frame
<cancan101> list being 1 or 0?
<Dark_Shikari> depends on which motion search you're doing
<cancan101> right
<cancan101> so 0
<Dark_Shikari> since you're doing only P-frames to start with, you can start with zero.
<cancan101> or 0/1?
<cancan101> k
<cancan101> whats h->fref0
<Dark_Shikari> oh wait, you don't want to use mb.pic
<Dark_Shikari> that's initialized in cache_load
<Dark_Shikari> you want to use h->fref0
<Dark_Shikari> h->fref0[0] is the first reference frame (and so forth)
<cancan101> i_ref0 is # of refs?
<Dark_Shikari> yes
<Dark_Shikari> in list0
Jan 24 13:55:05 <cancan101> Are the motion vectors stored in h->mb.mv measured in quarter pixel increments (ie does 1 mean .25 pel movement)?
Jan 24 14:16:23 <cancan101> Can anyone explain the h->mb.mv array
Jan 24 14:22:21 <Dark_Shikari> yes
Jan 24 14:22:25 <Dark_Shikari> they're stored as qpel
Jan 24 14:22:35 <cancan101> and every 16
Jan 24 14:22:40 <cancan101> is the 16x16 mv?
Jan 24 14:22:41 <Dark_Shikari> microchip_: its a troll. its intentional. ignore /g/
Jan 24 14:23:00 <Dark_Shikari> cancan101: if you use --partitions none, every 16th pair of int16_t values is a 16x16 MV, yes
Jan 24 14:23:08 <cancan101> for(q =0 ;q<h->sh.i_last_mb*16;q+=16 ){
Jan 24 14:23:08 <cancan101> if(h->mb.mv[0][q][0]!=0 || h->mb.mv[0][q][1]!=0)
Jan 24 14:23:08 <cancan101> printf("%i: %hi %hi\n",q>>4,h->mb.mv[0][q][0], h->mb.mv[0][q][1]);
Jan 24 14:23:08 <cancan101> }
Jan 24 14:23:14 <microchip_> lol Dark_Shikari i know, but i go there to laugh :)
Jan 24 14:23:22 <Dark_Shikari> Kov-san: I have the lossless version
Jan 24 14:23:47 <Dark_Shikari> cancan101: why are you saying "if not equal to zero"?
Jan 24 14:23:57 <cancan101> otherwise it prints a lot
Jan 24 14:24:02 <cancan101> compression
Jan 24 14:24:06 <cancan101> of ouput :-)
Jan 24 14:24:09 <Dark_Shikari> lol
Jan 24 14:24:14 <Dark_Shikari> but lots of real MVs *are* zeroes =p
Jan 24 14:24:18 <cancan101> right
Jan 24 14:24:22 <cancan101> but if i am ouputting
Jan 24 14:24:23 <Dark_Shikari> you may want to also print out whether the MB is intra
Jan 24 14:24:24 <cancan101> x264 mvs
Jan 24 14:24:25 <Dark_Shikari> =p
Jan 24 14:24:25 <LordRPI> "What's your favorite container?" brings to mind an old old joke...rnrn"What's a vagina?" "It's the box the penis comes in..."
Jan 24 14:24:28 * jreinhardt (n=jreinhar@dslb-088-064-128-139.pools.arcor-ip.net) has joined #x264
Jan 24 14:24:32 <Dark_Shikari> LordRPI: lol
Jan 24 14:24:34 <cancan101> and pyramid
Jan 24 14:24:38 <cancan101> and i say not 0 in both...
Jan 24 14:24:42 <cancan101> i can compare
Jan 24 14:24:48 <cancan101> or are some MBs
Jan 24 14:24:50 <cancan101> intra
Jan 24 14:24:51 <cancan101> within
Jan 24 14:24:56 <cancan101> a non intra frame?
Jan 24 14:24:57 <Dark_Shikari> yes, some mbs are intra
Jan 24 14:25:06 <cancan101> mmm
Jan 24 14:25:09 <Dark_Shikari> LordRPI: http://www.reddit.com/r/funny/comments/7rxyh/collection_of_totally_offensive_jokes_not_for_the/
Jan 24 14:25:24 <Dark_Shikari> cancan101: mb type is stored as well
Jan 24 14:25:35 <Dark_Shikari> IS_INTRA( mb type for a given macroblock ) will return 1 if it's intra
Jan 24 14:26:25 <Dark_Shikari> h->mb.type[i_mb_xy]
Jan 24 14:28:31 <LordRPI> oh boy :)
Jan 24 14:37:19 <deebo> are deadzones completely ignore when using trellis2?
Jan 24 14:37:24 * fbs (i=t7DS@201008140047.user.veloxzone.com.br) has joined #x264
Jan 24 14:37:27 <Dark_Shikari> no
Jan 24 14:37:33 <cancan101> also are those MVs
Jan 24 14:37:34 <Dark_Shikari> they're still used in fast pskip
Jan 24 14:37:36 <cancan101> in absolute tersm
Jan 24 14:37:39 <Dark_Shikari> cancan101: yes
Jan 24 14:37:42 <Dark_Shikari> they're not relative to mvp
Jan 24 14:37:45 <cancan101> nice
Jan 24 14:37:46 <cancan101> ok
Jan 24 14:37:47 <Dark_Shikari> mb.mvd[] is relative to mvp
Jan 24 14:37:55 <Dark_Shikari> (but is only stored when cabac is on)
Jan 24 14:38:04 <cancan101> why is that?
Jan 24 14:38:38 <Dark_Shikari> because cabac needs to store mvds for future use, cavlc doesn't
Jan 24 14:39:11 <cancan101> when using cavlc dont u encode the MVD though?
Jan 24 14:39:42 <Dark_Shikari> sure, but you don't store it
Jan 24 14:40:04 <Dark_Shikari> because you need it in cabac for context calculation for future mvs
Jan 24 14:40:06 <Dark_Shikari> you don't need it in cavlc
Jan 24 14:40:22 <Dark_Shikari> data is only stored in h->mb. if it's *needed* for some purpose
Jan 24 15:17:30 <cancan101> one thing about using the x264 downsampling code
Jan 24 15:17:33 <cancan101> is roung error
Jan 24 15:17:36 <cancan101> rounding
Jan 24 15:17:47 <cancan101> like average of 3 and 4
Jan 24 15:17:58 <cancan101> and then that with 4
Jan 24 15:18:08 <Dark_Shikari> yes, it will round up too much over time
Jan 24 15:18:14 <Dark_Shikari> no, this probably isn't a big problem in practice
Jan 24 15:18:17 <cancan101> round up or down?
Jan 24 15:18:40 <Dark_Shikari> it rounds up everywhere
Jan 24 15:18:44 <Dark_Shikari> that's what pavg does
Jan 24 15:18:54 <Dark_Shikari> H/V = (X+Y+1)>>1
Jan 24 15:19:19 <Dark_Shikari> C = (((X+Y+1)>>1 + (Z+W+1)>>1)+1)>>1
Jan 24 15:19:25 <cancan101> ah
Jan 24 15:19:27 <cancan101> +1
Jan 24 15:19:56 <cancan101> ok
Jan 24 15:20:02 <cancan101> but u dont think thats an issue?
Jan 24 15:20:13 <cancan101> cause i noticed this when i did mine in c
Jan 24 15:20:13 <Dark_Shikari> not really, since we're just using it for a motion search
Jan 24 15:20:15 <Dark_Shikari> it doesn't have to be exact
Jan 24 15:20:19 <cancan101> and sume all values in 16x16
Jan 24 15:20:24 <cancan101> rather than doing recursively
Jan 24 15:20:27 <cancan101> and sucj
Jan 24 15:20:28 <Dark_Shikari> and our vector will get refined over time as we downscale less anyways
Jan 24 15:20:36 <cancan101> fair enough
Jan 24 15:20:41 <Dark_Shikari> we can modify it later if we need to