ABC inference of selection

Post date: Sep 09, 2015 4:52:16 PM

I finished the code for the WF ABC simulations to infer selection coefficients for the seed beetles. The source code is here /home/zgompert/Documents/programs/beetleAbc and in CVS, and the func.C file is pasted below. Right now I am running the lentil adaptation scenario with the 673 loci with exceptional change on lentil. The results will be here /labs/evolution/projects/cmaclentil/popgen/abc/sims and there will be 1 file per SNP with 2 million simulations. Here is an example command:

~/bin/beetleABC -i /labs/evolution/projects/cmaclentil/popgen/abc/pLentilSnps.txt -l 672 -n 2000000 -v 0.15 -s 1 -m 1 > abcout_lentilSnp672.txt

Here is the source code for func.C:

#include <iostream>

#include <sstream>

#include <fstream>

#include <gsl/gsl_matrix.h>

#include <gsl/gsl_vector.h>

#include <gsl/gsl_rng.h>

#include <gsl/gsl_randist.h>

#include <gsl/gsl_statistics_double.h>

#include <gsl/gsl_sf_gamma.h>

#include <float.h>

#include <math.h>

#include "beetle.H"

using namespace std;

// print software usage

void usage(char * name){

fprintf(stdout,"\n%s version %s\n\n", name, VERSION);

fprintf(stdout, "Usage: beetleABC -i infile [options]\n");

fprintf(stdout, "-i Infile with allele frequency data\n");

fprintf(stdout, "-l Number of SNP to analyze [0]\n");

fprintf(stdout, "-n Number of ABC simulations [1000]\n");

fprintf(stdout, "-v SD for slab mixture prior on s [0.1]\n");

fprintf(stdout, "-s Scenario, 1 = lentil adaptation, 2 = tradeoffs [1]\n");

fprintf(stdout, "-m Model, 1 = ongoing or 2 = rapid adaptation [1]\n");

fprintf(stdout, "-t Time (generations) for rapid adaptation model [8]\n");

exit(1);

}

// ------ Functions for input and output ---------------

// read Ne and SNP allele frequency

void getdata(string file, dataset * data){

int i, j;

double x;

string line, element;

ifstream infile;

istringstream stream;

// open infile

infile.open(file.c_str());

if (!infile){

cerr << "Cannot open file " << file << endl;

exit(1);

}

// read Ne estimates

getline(infile, line); // data for one locus

stream.str(line);

for(j=0; j<(data->N+1); j++){

stream >> element;

x = atof(element.c_str());

gsl_vector_set(data->varNe, j, x);

}

stream.clear();

// read and store M14 allele freq. for SNP l

for(i=0; i<=data->snp; i++){

getline(infile, line); // working through file until focal SNP

}

stream.str(line);

stream.clear();

// allele frequency

stream >> element; // locus id

stream >> element;

data->m14p = atof(element.c_str());

infile.close();

}

// wrapper function for WF ABC simulations

void wfsim(dataset * data, parameters * parm){

int j;

// sample parameters from prior

sampleparms(data, parm);

// simulate model

if (data->scenario == 1) // lentil adaptation

sim(data, parm);

else if(data->scenario == 2) // tradeoffs

simrev(data, parm);

// print summary statistics

cout << parm->mod;

for(j=0; j<data->N; j++){

cout << " " << gsl_vector_get(parm->s, j);

}

for(j=0; j<data->N; j++){

cout << " " << gsl_vector_get(parm->p, j);

}

cout << endl;

}

// sample model (constrained vs. unconstrained) and selection coefficients from priors

void sampleparms(dataset * data, parameters * parm){

int j;

double u, s;

double spike = 0.00165; // s dominates when >> 1/2N, so SD = 1/(2*301.4) or about 1/(2*Ne)

// +- 0.00497 is 3 sd

u = gsl_ran_flat(r, 0, 1);

// constrained selection with lentil-adaptation

if (u < 0.5 && data->scenario == 1){

parm->mod = 0; // constrain s to be same for all lines

u = gsl_ran_flat(r, 0, 1); // spike or slab

if (u < 0.5)

s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with slab SD

else

s = gsl_ran_gaussian(r, spike); // 0-centered normal with spike SD

for(j=0; j<data->N; j++){

gsl_vector_set(parm->s, j, s);

}

// constrained selection tradeoffs // COULD GET RID OF THIS ONE //

else if (u < 0.5 && data->scenario == 2){

parm->mod = 0; // constrain s to be same for all L lines

u = gsl_ran_flat(r, 0, 1); // spike or slab

if (u < 0.5)

s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with slab SD

else

s = gsl_ran_gaussian(r, spike); // 0-centered normal with spike SD

for(j=0; j<3; j++){ // only loop through L lines

gsl_vector_set(parm->s, j, s);

}

// noew loop through LR lines

for(j=3; j<data->N; j++){

u = gsl_ran_flat(r, 0, 1); // spike or slab

if (u < 0.5)

s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with sd = sigma

else

s = gsl_ran_gaussian(r, spike);

gsl_vector_set(parm->s, j, s);

}

// unconstrained selection either scenario

else{

parm->mod = 1; // s varies by line

for(j=0; j<data->N; j++){

u = gsl_ran_flat(r, 0, 1); // spike or slab

if (u < 0.5)

s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with sd = sigma

else

s = gsl_ran_gaussian(r, spike);

gsl_vector_set(parm->s, j, s);

}

// simulate evolution according to a W-F model with selection, with reversion lines

void simrev(dataset * data, parameters * parm){

int j, i;

double panc[4];

double prev[3] = {0, 0, 0}; // starting p for reversion lines

double Ne = gsl_vector_get(data->varNe, 0); // Ne for M line

double tm[3] = {104, 3, 24}; // generations back to the split with each lentil line; back to L3, farther back to L2 (3+104 = 107), and farther back to L1 (24 + 107 = 131)

double tl[3] = {100, 87, 85}; // generations for L1, L2, and L3

double tlsplit[3] = {62, 48, 45}; // generation at which L and LR split

double tr[3] = {46, 45, 45}; // generations for reversion lines

double x, alpha, beta, fst;

double p, pexp;

// simulate P back to ancestors

panc[0] = data->m14p;

for(j=0; j<3; j++){

x = (1-1/(2*Ne));

fst = 1 - pow(x, tm[j]);

alpha = panc[j] * ((1-fst)/fst);

beta = (1-panc[j]) * ((1-fst)/fst);

panc[j+1] = gsl_ran_beta(r, alpha, beta);

}

// simulate P forward in L1

p = panc[3];

Ne = gsl_vector_get(data->varNe, 1);

for(i=0; i<tl[0]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 0));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

if (i == tlsplit[0])// LR splits from L

prev[0] = p;

}

gsl_vector_set(parm->p, 0, p); // simulated allele freq for L1

// simulate P forward in L2

p = panc[2];

Ne = gsl_vector_get(data->varNe, 2);

for(i=0; i<tl[1]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 1));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

if (i == tlsplit[1])// LR splits from L

prev[1] = p;

}

gsl_vector_set(parm->p, 1, p); // simulated allele freq for L2

// simulate P forward in L3

p = panc[1];

Ne = gsl_vector_get(data->varNe, 3);

for(i=0; i<tl[2]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 2));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

if (i == tlsplit[0])// LR splits from L

prev[2] = p;

}

gsl_vector_set(parm->p, 2, p); // simulated allele freq for L3

// reversion lines

// simulate P forward for L1R

p = prev[0];

Ne = gsl_vector_get(data->varNe, 4);

for(i=0; i<tr[0]; i++){

pexp = calcDp(p, gsl_vector_get(parm->s, 3));

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 3, p); // simulated allele freq for L1R

// simulate P forward for L2R

p = prev[1];

Ne = gsl_vector_get(data->varNe, 5);

for(i=0; i<tr[1]; i++){

pexp = calcDp(p, gsl_vector_get(parm->s, 4));

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 4, p); // simulated allele freq for L2R

// simulate P forward for L3R

p = prev[2];

Ne = gsl_vector_get(data->varNe, 6);

for(i=0; i<tr[2]; i++){

pexp = calcDp(p, gsl_vector_get(parm->s, 5));

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 5, p); // simulated allele freq for L3R

}

// simulate evolution according to a W-F model with selection

void sim(dataset * data, parameters * parm){

int j, i;

double panc[4];

double Ne = gsl_vector_get(data->varNe, 0); // Ne for M line

double tm[3] = {104, 3, 24}; // generations back to the split with each lentil line; back to L3, farther back to L2 (3+104 = 107), and farther back to L1 (24 + 107 = 131)

double tl[3] = {100, 87, 85}; // generations for L1, L2, and L3

double x, alpha, beta, fst;

double p, pexp;

// simulate P back to ancestors

panc[0] = data->m14p;

for(j=0; j<3; j++){

x = (1-1/(2*Ne));

fst = 1 - pow(x, tm[j]);

alpha = panc[j] * ((1-fst)/fst);

beta = (1-panc[j]) * ((1-fst)/fst);

panc[j+1] = gsl_ran_beta(r, alpha, beta);

}

// simulate P forward in L1

p = panc[3];

Ne = gsl_vector_get(data->varNe, 1);

for(i=0; i<tl[0]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 0));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 0, p); // simulated allele freq for L1

// simulate P forward in L2

p = panc[2];

Ne = gsl_vector_get(data->varNe, 2);

for(i=0; i<tl[1]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 1));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 1, p); // simulated allele freq for L2

// simulate P forward in L3

p = panc[1];

Ne = gsl_vector_get(data->varNe, 3);

for(i=0; i<tl[2]; i++){

if (data->model == 1 || i < data->time)

pexp = calcDp(p, gsl_vector_get(parm->s, 2));

else // adaptation is complete in model 2 at this point

pexp = p;

p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);

}

gsl_vector_set(parm->p, 2, p); // simulated allele freq for L3

}

// calculated expected change in allele freq. by selection

double calcDp(double p, double s){

double X, Y2, Z;

double wbar, w[3];

double h = 0.5;

double dp;

X = pow(p, 2);

Y2 = 2 * p * (1-p);

Z = pow(1-p,2);

// w11 = fitness X = 1+s, w12 = fitness 2Y = 1+sh, w22 = fitness Z = 1

w[0] = 1 + s;

w[1] = 1 + s * h;

w[2] = 1;

wbar = X * w[0] + Y2 * w[1] + Z * w[2];

dp = (w[0] * X + w[1] * 0.5 * Y2 - p * wbar)/wbar;

return (p + dp);

}

Page updated

Google Sites

Report abuse