me.c

x264_me_search_ref

Called by

    • x264_mb_analyse_inter_p8x8_mixed_ref

    • x264_mb_analyse_inter_b16x16

    • x264_mb_analyse_inter_p16x16

    • x264_me_search

      • x264_slicetype_mb_cost (slicetype.c)

      • x264_mb_analyse_inter_p8x8

      • x264_mb_analyse_inter_p16x8

      • x264_mb_analyse_inter_p8x16

      • x264_mb_analyse_inter_p4x4

      • x264_mb_analyse_inter_p8x4

      • x264_mb_analyse_inter_p4x8

      • x264_mb_analyse_inter_b16x8

      • x264_mb_analyse_inter_b8x16

      • x264_mb_analyse_inter_b8x8

Parameters

    • x264_t *h

    • x264_me_t *m

    • int16_t (*mvc)[2]

    • int i_mvc

    • int *p_halfpel_thresh

Initialize

const int bw = x264_pixel_size[m->i_pixel].w;

const int bh = x264_pixel_size[m->i_pixel].h;

const int i_pixel = m->i_pixel;

int i_me_range = h->param.analyse.i_me_range; //gets "Maximum motion vector search range." Set as --merange. default is 16

int bmx, bmy, bcost;

int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;

int omx, omy, pmx, pmy;

uint8_t *p_fref = m->p_fref[0];

DECLARE_ALIGNED_16( uint8_t pix[16*16] );

int i, j;

int dir;

int costs[6];

int mv_x_min = h->mb.mv_min_fpel[0]; //what are these?

int mv_y_min = h->mb.mv_min_fpel[1];

int mv_x_max = h->mb.mv_max_fpel[0];

int mv_y_max = h->mb.mv_max_fpel[1];

#define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max )

const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];

const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];

bmx = x264_clip3( m->mvp[0], mv_x_min*4, mv_x_max*4 ); //make sure bmx falls between mv_x_min*4 and mv_x_max*4

bmy = x264_clip3( m->mvp[1], mv_y_min*4, mv_y_max*4 );

pmx = ( bmx + 2 ) >> 2;

pmy = ( bmy + 2 ) >> 2;

bcost = COST_MAX;

Prediction

/* check the MVP */ //find the cost of the MVP (motion vector prediction?)

// COST_MV( pmx, pmy ); //fully expands to:

{

int cost = h->pixf.fpelcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE,

&p_fref[(pmy)*m->i_stride[0]+(pmx)], m->i_stride[0] )

+ (p_cost_mvx[(pmx)<<2] + p_cost_mvy[(pmy)<<2]);

if((cost)<(bcost))

{

(bcost)=(cost);

(bmx)=(pmx);

(bmy)=(pmy);

};

}

/* Because we are rounding the predicted motion vector to fullpel, there will be

* an extra MV cost in 15 out of 16 cases. However, when the predicted MV is

* chosen as the best predictor, it is often the case that the subpel search will

* result in a vector at or next to the predicted motion vector. Therefore, it is

* sensible to remove the cost of the MV from the rounded MVP to avoid unfairly

* biasing against use of the predicted motion vector. */

bcost -= BITS_MVD( pmx, pmy );

for( i = 0; i < i_mvc; i++ )

{

int mx = (mvc[i][0] + 2) >> 2;

int my = (mvc[i][1] + 2) >> 2;

if( (mx | my) && ((mx-bmx) | (my-bmy)) )

{

mx = x264_clip3( mx, mv_x_min, mv_x_max );

my = x264_clip3( my, mv_y_min, mv_y_max );

COST_MV( mx, my );

}

}

Motion Search

COST_MV( 0, 0 );

Diamond Search (radius 1) (aka Gradient Descent)

/* diamond search, radius 1 */

i = 0;

do

{

//DIA1_ITER( bmx, bmy );

{

omx = bmx; omy = bmy;

{

uint8_t *pix_base = p_fref + omx + omy*m->i_stride[0];

h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0],

pix_base + (0) + (-1)*m->i_stride[0],

pix_base + (0) + (1)*m->i_stride[0],

pix_base + (-1) + (0)*m->i_stride[0],

pix_base + (1) + (0)*m->i_stride[0],

m->i_stride[0], costs );

costs[0] += (p_cost_mvx[(omx+(0))<<2] + p_cost_mvy[(omy+(-1))<<2]);

costs[1] += (p_cost_mvx[(omx+(0))<<2] + p_cost_mvy[(omy+(1))<<2]);

costs[2] += (p_cost_mvx[(omx+(-1))<<2] + p_cost_mvy[(omy+(0))<<2]);

costs[3] += (p_cost_mvx[(omx+(1))<<2] + p_cost_mvy[(omy+(0))<<2]);

if((costs[0])<(bcost))

{

(bcost)=(costs[0]);

(bmx)=(omx+(0));

(bmy)=(omy+(-1));

};

if((costs[1])<(bcost))

{

(bcost)=(costs[1]);

(bmx)=(omx+(0));

(bmy)=(omy+(1));

};

if((costs[2])<(bcost))

{

(bcost)=(costs[2]);

(bmx)=(omx+(-1));

(bmy)=(omy+(0));

};

if((costs[3])<(bcost))

{

(bcost)=(costs[3]);

(bmx)=(omx+(1));

(bmy)=(omy+(0));

};

};

}

if( (bmx == omx) & (bmy == omy) ) //we are at min (i.e. non of the of the other values produced a smaller cost)

break;

if( !CHECK_MVRANGE(bmx, bmy) )

break;

} while( ++i < i_me_range );

break;

Cost Calculations

<cancan101> in the the macro COST_MV

<cancan101> cost is the sum of two parts

<cancan101> the results of the compare operation

<cancan101> and BITS_MVD

<cancan101> does BITS_MVD represent the cost of encoding the MV?

<Alex_W> yes

<cancan101> is it an estimate ?

<Dark_Shikari> for CAVLC, it is exact

<Dark_Shikari> (well, exact bit cost * lambda)

<Dark_Shikari> we have a pending patch that changes it to a different approximation which, paradoxically, is better even for CAVLC... but that's another story

<cancan101> for the purposes of implementing this simplified MEtor

<cancan101> what lamba should i use

<cancan101> lambda

<Dark_Shikari> just grab the one for, say, QP24

<Dark_Shikari> its a reasonable starting point

<cancan101> and costs for vector length:

<cancan101> bs_size_se?

<Dark_Shikari> see analyse_load_costs in analyse.c

<Dark_Shikari> a->i_lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;

<Dark_Shikari> we switched from bs_size_se to that because it gave better results

<Dark_Shikari> but you can use whatever follows that distribution of costs and is easiest for you

<cancan101> and i is the length measured in quarter pels?

<cancan101> of each component

Jan 24 19:43:56 <cancan101> Dark_Shikari: lamba = 4

Jan 24 19:44:01 <cancan101> is a good guess?

Jan 24 19:44:10 <cancan101> / appx?

Jan 24 19:44:39 <Dark_Shikari> a good starting point, sure

Jan 24 19:44:54 <cancan101> so something like:

Jan 24 19:44:54 <cancan101> inline unsigned mvCost(int dx, int dy){

Jan 24 19:44:54 <cancan101> dx = (abs(dx))<<2;

Jan 24 19:44:54 <cancan101> dy = (abs(dy))<<2;

Jan 24 19:44:54 <cancan101> int xCost = round((log2f(dx+1)*2 + 0.718f + !!dx) + .5f);

Jan 24 19:44:54 <cancan101> int yCost = round((log2f(dy+1)*2 + 0.718f + !!dy) + .5f);

Jan 24 19:44:54 <cancan101> int lambda = 4;

Jan 24 19:44:54 <cancan101> return lambda * (xCost+yCost);

Jan 24 19:44:54 <cancan101> }

Jan 24 19:45:42 <Dark_Shikari> Sure, but why not just use the tables that already exist?

Jan 24 19:45:49 <cancan101> i will

Jan 24 19:45:55 <cancan101> just seeing if there is a bug

Jan 24 19:45:57 <cancan101> somewhere

Jan 24 19:45:57 <Dark_Shikari> ah k

Jan 24 19:46:03 <cancan101> before i make things more comlicaed

Jan 24 19:46:04 <Dark_Shikari> remember you do have to init the table first, of course

Jan 24 19:46:08 <Dark_Shikari> see what lowres_context_init does in slicetype.c

Jan 24 19:46:10 <cancan101> its finding MVs

Jan 24 19:46:15 <Dark_Shikari> k

MV Storage

<Dark_Shikari> x264_frame_t->mv

<Dark_Shikari> Note these will only be valid for inter blocks

<Dark_Shikari> if you want to see how the mvs are stored, see common/macroblock.c lines 1273 through 1313

<Dark_Shikari> each macroblock has 16 MVs, each of which is a pair of 16-bit signed ints

<cancan101> 16

<cancan101> for the subpartions?

<Dark_Shikari> Yes

<Dark_Shikari> 16 are *always* stored, just for simplciity

<Dark_Shikari> obviously, if they're all the same, they'll be identical (16x16 partition)

<Dark_Shikari> cancan101: h->mb.mv

<cancan101> what is mv[0] vs mv[1]?

<Dark_Shikari> mv[list][position][x/y]

Jan 24 19:46:26 <cancan101> when its intra

Jan 24 19:46:29 <cancan101> are the Mvs =0?

Jan 24 19:46:36 <Dark_Shikari> yes

Jan 24 19:46:41 <cancan101> ok

Jan 24 19:46:41 <Dark_Shikari> that's how the spec defines it

Jan 24 19:46:47 <cancan101> i mean

Jan 24 19:46:51 <cancan101> stored in the h

Jan 24 19:46:56 <cancan101> h->...

Jan 24 19:47:11 <cancan101> wither way

Jan 24 19:47:22 <Dark_Shikari> yes

Controlling ME for Subpartions

<cancan101> can you force x264 to only look for 16x16 MVs (i.e. no subpartions)?

<Dark_Shikari> --partitions none

<Dark_Shikari> in fact, just disabling p8x8/b8x8 is sufficient

<Dark_Shikari> as the others are intra options

<Dark_Shikari> its a command line option

<Dark_Shikari> it affects param.inter

<Dark_Shikari> param.analyse.inter I think

<cancan101> -A

<Dark_Shikari> h->param.analyse.inter yes

<Dark_Shikari> also known as -A (--analyse)

<Dark_Shikari> all aliases for the same thing

Reference Frames

<Dark_Shikari> h->mb.pic.p_fref[list][ref] is a reference frame

<cancan101> list being 1 or 0?

<Dark_Shikari> depends on which motion search you're doing

<cancan101> right

<cancan101> so 0

<Dark_Shikari> since you're doing only P-frames to start with, you can start with zero.

<cancan101> or 0/1?

<cancan101> k

<cancan101> whats h->fref0

<Dark_Shikari> oh wait, you don't want to use mb.pic

<Dark_Shikari> that's initialized in cache_load

<Dark_Shikari> you want to use h->fref0

<Dark_Shikari> h->fref0[0] is the first reference frame (and so forth)

<cancan101> i_ref0 is # of refs?

<Dark_Shikari> yes

<Dark_Shikari> in list0

Jan 24 13:55:05 <cancan101> Are the motion vectors stored in h->mb.mv measured in quarter pixel increments (ie does 1 mean .25 pel movement)?

Jan 24 14:16:23 <cancan101> Can anyone explain the h->mb.mv array

Jan 24 14:22:21 <Dark_Shikari> yes

Jan 24 14:22:25 <Dark_Shikari> they're stored as qpel

Jan 24 14:22:35 <cancan101> and every 16

Jan 24 14:22:40 <cancan101> is the 16x16 mv?

Jan 24 14:22:41 <Dark_Shikari> microchip_: its a troll. its intentional. ignore /g/

Jan 24 14:23:00 <Dark_Shikari> cancan101: if you use --partitions none, every 16th pair of int16_t values is a 16x16 MV, yes

Jan 24 14:23:08 <cancan101> for(q =0 ;q<h->sh.i_last_mb*16;q+=16 ){

Jan 24 14:23:08 <cancan101> if(h->mb.mv[0][q][0]!=0 || h->mb.mv[0][q][1]!=0)

Jan 24 14:23:08 <cancan101> printf("%i: %hi %hi\n",q>>4,h->mb.mv[0][q][0], h->mb.mv[0][q][1]);

Jan 24 14:23:08 <cancan101> }

Jan 24 14:23:14 <microchip_> lol Dark_Shikari i know, but i go there to laugh :)

Jan 24 14:23:22 <Dark_Shikari> Kov-san: I have the lossless version

Jan 24 14:23:47 <Dark_Shikari> cancan101: why are you saying "if not equal to zero"?

Jan 24 14:23:57 <cancan101> otherwise it prints a lot

Jan 24 14:24:02 <cancan101> compression

Jan 24 14:24:06 <cancan101> of ouput :-)

Jan 24 14:24:09 <Dark_Shikari> lol

Jan 24 14:24:14 <Dark_Shikari> but lots of real MVs *are* zeroes =p

Jan 24 14:24:18 <cancan101> right

Jan 24 14:24:22 <cancan101> but if i am ouputting

Jan 24 14:24:23 <Dark_Shikari> you may want to also print out whether the MB is intra

Jan 24 14:24:24 <cancan101> x264 mvs

Jan 24 14:24:25 <Dark_Shikari> =p

Jan 24 14:24:25 <LordRPI> "What's your favorite container?" brings to mind an old old joke...rnrn"What's a vagina?" "It's the box the penis comes in..."

Jan 24 14:24:28 * jreinhardt (n=jreinhar@dslb-088-064-128-139.pools.arcor-ip.net) has joined #x264

Jan 24 14:24:32 <Dark_Shikari> LordRPI: lol

Jan 24 14:24:34 <cancan101> and pyramid

Jan 24 14:24:38 <cancan101> and i say not 0 in both...

Jan 24 14:24:42 <cancan101> i can compare

Jan 24 14:24:48 <cancan101> or are some MBs

Jan 24 14:24:50 <cancan101> intra

Jan 24 14:24:51 <cancan101> within

Jan 24 14:24:56 <cancan101> a non intra frame?

Jan 24 14:24:57 <Dark_Shikari> yes, some mbs are intra

Jan 24 14:25:06 <cancan101> mmm

Jan 24 14:25:09 <Dark_Shikari> LordRPI: http://www.reddit.com/r/funny/comments/7rxyh/collection_of_totally_offensive_jokes_not_for_the/

Jan 24 14:25:24 <Dark_Shikari> cancan101: mb type is stored as well

Jan 24 14:25:35 <Dark_Shikari> IS_INTRA( mb type for a given macroblock ) will return 1 if it's intra

Jan 24 14:26:25 <Dark_Shikari> h->mb.type[i_mb_xy]

Jan 24 14:28:31 <LordRPI> oh boy :)

Jan 24 14:37:19 <deebo> are deadzones completely ignore when using trellis2?

Jan 24 14:37:24 * fbs (i=t7DS@201008140047.user.veloxzone.com.br) has joined #x264

Jan 24 14:37:27 <Dark_Shikari> no

Jan 24 14:37:33 <cancan101> also are those MVs

Jan 24 14:37:34 <Dark_Shikari> they're still used in fast pskip

Jan 24 14:37:36 <cancan101> in absolute tersm

Jan 24 14:37:39 <Dark_Shikari> cancan101: yes

Jan 24 14:37:42 <Dark_Shikari> they're not relative to mvp

Jan 24 14:37:45 <cancan101> nice

Jan 24 14:37:46 <cancan101> ok

Jan 24 14:37:47 <Dark_Shikari> mb.mvd[] is relative to mvp

Jan 24 14:37:55 <Dark_Shikari> (but is only stored when cabac is on)

Jan 24 14:38:04 <cancan101> why is that?

Jan 24 14:38:38 <Dark_Shikari> because cabac needs to store mvds for future use, cavlc doesn't

Jan 24 14:39:11 <cancan101> when using cavlc dont u encode the MVD though?

Jan 24 14:39:42 <Dark_Shikari> sure, but you don't store it

Jan 24 14:40:04 <Dark_Shikari> because you need it in cabac for context calculation for future mvs

Jan 24 14:40:06 <Dark_Shikari> you don't need it in cavlc

Jan 24 14:40:22 <Dark_Shikari> data is only stored in h->mb. if it's *needed* for some purpose

Jan 24 15:17:30 <cancan101> one thing about using the x264 downsampling code

Jan 24 15:17:33 <cancan101> is roung error

Jan 24 15:17:36 <cancan101> rounding

Jan 24 15:17:47 <cancan101> like average of 3 and 4

Jan 24 15:17:58 <cancan101> and then that with 4

Jan 24 15:18:08 <Dark_Shikari> yes, it will round up too much over time

Jan 24 15:18:14 <Dark_Shikari> no, this probably isn't a big problem in practice

Jan 24 15:18:17 <cancan101> round up or down?

Jan 24 15:18:40 <Dark_Shikari> it rounds up everywhere

Jan 24 15:18:44 <Dark_Shikari> that's what pavg does

Jan 24 15:18:54 <Dark_Shikari> H/V = (X+Y+1)>>1

Jan 24 15:19:19 <Dark_Shikari> C = (((X+Y+1)>>1 + (Z+W+1)>>1)+1)>>1

Jan 24 15:19:25 <cancan101> ah

Jan 24 15:19:27 <cancan101> +1

Jan 24 15:19:56 <cancan101> ok

Jan 24 15:20:02 <cancan101> but u dont think thats an issue?

Jan 24 15:20:13 <cancan101> cause i noticed this when i did mine in c

Jan 24 15:20:13 <Dark_Shikari> not really, since we're just using it for a motion search

Jan 24 15:20:15 <Dark_Shikari> it doesn't have to be exact

Jan 24 15:20:19 <cancan101> and sume all values in 16x16

Jan 24 15:20:24 <cancan101> rather than doing recursively

Jan 24 15:20:27 <cancan101> and sucj

Jan 24 15:20:28 <Dark_Shikari> and our vector will get refined over time as we downscale less anyways

Jan 24 15:20:36 <cancan101> fair enough

Jan 24 15:20:41 <Dark_Shikari> we can modify it later if we need to