Post date: Sep 09, 2015 4:52:16 PM
I finished the code for the WF ABC simulations to infer selection coefficients for the seed beetles. The source code is here /home/zgompert/Documents/programs/beetleAbc and in CVS, and the func.C file is pasted below. Right now I am running the lentil adaptation scenario with the 673 loci with exceptional change on lentil. The results will be here /labs/evolution/projects/cmaclentil/popgen/abc/sims and there will be 1 file per SNP with 2 million simulations. Here is an example command:
~/bin/beetleABC -i /labs/evolution/projects/cmaclentil/popgen/abc/pLentilSnps.txt -l 672 -n 2000000 -v 0.15 -s 1 -m 1 > abcout_lentilSnp672.txt
Here is the source code for func.C:
#include <iostream>
#include <sstream>
#include <fstream>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_sf_gamma.h>
#include <float.h>
#include <math.h>
#include "beetle.H"
using namespace std;
// print software usage
void usage(char * name){
fprintf(stdout,"\n%s version %s\n\n", name, VERSION);
fprintf(stdout, "Usage: beetleABC -i infile [options]\n");
fprintf(stdout, "-i Infile with allele frequency data\n");
fprintf(stdout, "-l Number of SNP to analyze [0]\n");
fprintf(stdout, "-n Number of ABC simulations [1000]\n");
fprintf(stdout, "-v SD for slab mixture prior on s [0.1]\n");
fprintf(stdout, "-s Scenario, 1 = lentil adaptation, 2 = tradeoffs [1]\n");
fprintf(stdout, "-m Model, 1 = ongoing or 2 = rapid adaptation [1]\n");
fprintf(stdout, "-t Time (generations) for rapid adaptation model [8]\n");
exit(1);
}
// ------ Functions for input and output ---------------
// read Ne and SNP allele frequency
void getdata(string file, dataset * data){
int i, j;
double x;
string line, element;
ifstream infile;
istringstream stream;
// open infile
infile.open(file.c_str());
if (!infile){
cerr << "Cannot open file " << file << endl;
exit(1);
}
// read Ne estimates
getline(infile, line); // data for one locus
stream.str(line);
for(j=0; j<(data->N+1); j++){
stream >> element;
x = atof(element.c_str());
gsl_vector_set(data->varNe, j, x);
}
stream.clear();
// read and store M14 allele freq. for SNP l
for(i=0; i<=data->snp; i++){
getline(infile, line); // working through file until focal SNP
}
stream.str(line);
stream.clear();
// allele frequency
stream >> element; // locus id
stream >> element;
data->m14p = atof(element.c_str());
infile.close();
}
// wrapper function for WF ABC simulations
void wfsim(dataset * data, parameters * parm){
int j;
// sample parameters from prior
sampleparms(data, parm);
// simulate model
if (data->scenario == 1) // lentil adaptation
sim(data, parm);
else if(data->scenario == 2) // tradeoffs
simrev(data, parm);
// print summary statistics
cout << parm->mod;
for(j=0; j<data->N; j++){
cout << " " << gsl_vector_get(parm->s, j);
}
for(j=0; j<data->N; j++){
cout << " " << gsl_vector_get(parm->p, j);
}
cout << endl;
}
// sample model (constrained vs. unconstrained) and selection coefficients from priors
void sampleparms(dataset * data, parameters * parm){
int j;
double u, s;
double spike = 0.00165; // s dominates when >> 1/2N, so SD = 1/(2*301.4) or about 1/(2*Ne)
// +- 0.00497 is 3 sd
u = gsl_ran_flat(r, 0, 1);
// constrained selection with lentil-adaptation
if (u < 0.5 && data->scenario == 1){
parm->mod = 0; // constrain s to be same for all lines
u = gsl_ran_flat(r, 0, 1); // spike or slab
if (u < 0.5)
s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with slab SD
else
s = gsl_ran_gaussian(r, spike); // 0-centered normal with spike SD
for(j=0; j<data->N; j++){
gsl_vector_set(parm->s, j, s);
}
}
// constrained selection tradeoffs // COULD GET RID OF THIS ONE //
else if (u < 0.5 && data->scenario == 2){
parm->mod = 0; // constrain s to be same for all L lines
u = gsl_ran_flat(r, 0, 1); // spike or slab
if (u < 0.5)
s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with slab SD
else
s = gsl_ran_gaussian(r, spike); // 0-centered normal with spike SD
for(j=0; j<3; j++){ // only loop through L lines
gsl_vector_set(parm->s, j, s);
}
// noew loop through LR lines
for(j=3; j<data->N; j++){
u = gsl_ran_flat(r, 0, 1); // spike or slab
if (u < 0.5)
s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with sd = sigma
else
s = gsl_ran_gaussian(r, spike);
gsl_vector_set(parm->s, j, s);
}
}
// unconstrained selection either scenario
else{
parm->mod = 1; // s varies by line
for(j=0; j<data->N; j++){
u = gsl_ran_flat(r, 0, 1); // spike or slab
if (u < 0.5)
s = gsl_ran_gaussian(r, parm->sigma); // 0-centered normal with sd = sigma
else
s = gsl_ran_gaussian(r, spike);
gsl_vector_set(parm->s, j, s);
}
}
}
// simulate evolution according to a W-F model with selection, with reversion lines
void simrev(dataset * data, parameters * parm){
int j, i;
double panc[4];
double prev[3] = {0, 0, 0}; // starting p for reversion lines
double Ne = gsl_vector_get(data->varNe, 0); // Ne for M line
double tm[3] = {104, 3, 24}; // generations back to the split with each lentil line; back to L3, farther back to L2 (3+104 = 107), and farther back to L1 (24 + 107 = 131)
double tl[3] = {100, 87, 85}; // generations for L1, L2, and L3
double tlsplit[3] = {62, 48, 45}; // generation at which L and LR split
double tr[3] = {46, 45, 45}; // generations for reversion lines
double x, alpha, beta, fst;
double p, pexp;
// simulate P back to ancestors
panc[0] = data->m14p;
for(j=0; j<3; j++){
x = (1-1/(2*Ne));
fst = 1 - pow(x, tm[j]);
alpha = panc[j] * ((1-fst)/fst);
beta = (1-panc[j]) * ((1-fst)/fst);
panc[j+1] = gsl_ran_beta(r, alpha, beta);
}
// simulate P forward in L1
p = panc[3];
Ne = gsl_vector_get(data->varNe, 1);
for(i=0; i<tl[0]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 0));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
if (i == tlsplit[0])// LR splits from L
prev[0] = p;
}
gsl_vector_set(parm->p, 0, p); // simulated allele freq for L1
// simulate P forward in L2
p = panc[2];
Ne = gsl_vector_get(data->varNe, 2);
for(i=0; i<tl[1]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 1));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
if (i == tlsplit[1])// LR splits from L
prev[1] = p;
}
gsl_vector_set(parm->p, 1, p); // simulated allele freq for L2
// simulate P forward in L3
p = panc[1];
Ne = gsl_vector_get(data->varNe, 3);
for(i=0; i<tl[2]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 2));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
if (i == tlsplit[0])// LR splits from L
prev[2] = p;
}
gsl_vector_set(parm->p, 2, p); // simulated allele freq for L3
// reversion lines
// simulate P forward for L1R
p = prev[0];
Ne = gsl_vector_get(data->varNe, 4);
for(i=0; i<tr[0]; i++){
pexp = calcDp(p, gsl_vector_get(parm->s, 3));
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 3, p); // simulated allele freq for L1R
// simulate P forward for L2R
p = prev[1];
Ne = gsl_vector_get(data->varNe, 5);
for(i=0; i<tr[1]; i++){
pexp = calcDp(p, gsl_vector_get(parm->s, 4));
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 4, p); // simulated allele freq for L2R
// simulate P forward for L3R
p = prev[2];
Ne = gsl_vector_get(data->varNe, 6);
for(i=0; i<tr[2]; i++){
pexp = calcDp(p, gsl_vector_get(parm->s, 5));
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 5, p); // simulated allele freq for L3R
}
// simulate evolution according to a W-F model with selection
void sim(dataset * data, parameters * parm){
int j, i;
double panc[4];
double Ne = gsl_vector_get(data->varNe, 0); // Ne for M line
double tm[3] = {104, 3, 24}; // generations back to the split with each lentil line; back to L3, farther back to L2 (3+104 = 107), and farther back to L1 (24 + 107 = 131)
double tl[3] = {100, 87, 85}; // generations for L1, L2, and L3
double x, alpha, beta, fst;
double p, pexp;
// simulate P back to ancestors
panc[0] = data->m14p;
for(j=0; j<3; j++){
x = (1-1/(2*Ne));
fst = 1 - pow(x, tm[j]);
alpha = panc[j] * ((1-fst)/fst);
beta = (1-panc[j]) * ((1-fst)/fst);
panc[j+1] = gsl_ran_beta(r, alpha, beta);
}
// simulate P forward in L1
p = panc[3];
Ne = gsl_vector_get(data->varNe, 1);
for(i=0; i<tl[0]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 0));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 0, p); // simulated allele freq for L1
// simulate P forward in L2
p = panc[2];
Ne = gsl_vector_get(data->varNe, 2);
for(i=0; i<tl[1]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 1));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 1, p); // simulated allele freq for L2
// simulate P forward in L3
p = panc[1];
Ne = gsl_vector_get(data->varNe, 3);
for(i=0; i<tl[2]; i++){
if (data->model == 1 || i < data->time)
pexp = calcDp(p, gsl_vector_get(parm->s, 2));
else // adaptation is complete in model 2 at this point
pexp = p;
p = gsl_ran_binomial(r, pexp, (uint) (2 * Ne))/(2.0 * Ne);
}
gsl_vector_set(parm->p, 2, p); // simulated allele freq for L3
}
// calculated expected change in allele freq. by selection
double calcDp(double p, double s){
double X, Y2, Z;
double wbar, w[3];
double h = 0.5;
double dp;
X = pow(p, 2);
Y2 = 2 * p * (1-p);
Z = pow(1-p,2);
// w11 = fitness X = 1+s, w12 = fitness 2Y = 1+sh, w22 = fitness Z = 1
w[0] = 1 + s;
w[1] = 1 + s * h;
w[2] = 1;
wbar = X * w[0] + Y2 * w[1] + Z * w[2];
dp = (w[0] * X + w[1] * 0.5 * Y2 - p * wbar)/wbar;
return (p + dp);
}