SINGLE POLE BALANCING EVOLUTION

genomestart 1

trait 1 0.1 0 0 0 0 0 0 0

trait 2 0.2 0 0 0 0 0 0 0

trait 3 0.3 0 0 0 0 0 0 0

node 1 0 1 3

node 2 0 1 1

node 3 0 1 1

node 4 0 1 1

node 5 0 1 1

node 6 0 0 2

node 7 0 0 2

gene 1 1 6 0.0 0 1 0 1

gene 2 2 6 0.0 0 2 0 1

gene 3 3 6 0.0 0 3 0 1

gene 1 4 6 0.0 0 4 0 1

gene 2 5 6 0.0 0 5 0 1

gene 3 1 7 0.0 0 6 0 1

gene 1 2 7 0.0 0 7 0 1

gene 2 3 7 0.0 0 8 0 1

gene 3 4 7 0.0 0 9 0 1

gene 1 5 7 0.0 0 10 0 1

genomeend 1

////////////////////////////////////////////////////////////////////////////////////////

//Single pole balancing evolution routines ***************************

Population *pole1_test(int gens);

bool pole1_evaluate(Organism *org);

int pole1_epoch(Population *pop,int generation,char *filename);

int go_cart(Network *net,int max_steps,int thresh); //Run input

//Move the cart and pole

void cart_pole(int action, float *x,float *x_dot, float *theta, float *theta_dot);

////////////////////////////////////////////////////////////////////////////////////////

//Perform evolution on single pole balacing, for gens generations

Population *pole1_test(int gens) {

    Population *pop;

    Genome *start_genome;

    char curword[20];

    int id;

    ostringstream *fnamebuf;

    int gen;

    int expcount;

    int status;

    int runs[NEAT::num_runs];

    int totalevals;

    ifstream iFile("pole1startgenes",ios::in);

    cout<<"START SINGLE POLE BALANCING EVOLUTION"<<endl;

    cout<<"Reading in the start genome"<<endl;

    //Read in the start Genome

    iFile>>curword;

    iFile>>id;

    cout<<"Reading in Genome id "<<id<<endl;

    start_genome=new Genome(id,iFile);

    iFile.close();

  

    //Run multiple experiments

    for(expcount=0;expcount<NEAT::num_runs;expcount++) {

      cout<<"EXPERIMENT #"<<expcount<<endl;

      cout<<"Start Genome: "<<start_genome<<endl;

      

      //Spawn the Population

      cout<<"Spawning Population off Genome"<<endl;

      

      pop=new Population(start_genome,NEAT::pop_size);

      

      cout<<"Verifying Spawned Pop"<<endl;

      pop->verify();

      for (gen=1;gen<=gens;gen++) {

cout<<"Generation "<<gen<<endl;


fnamebuf=new ostringstream();

(*fnamebuf)<<"gen_"<<gen<<ends;  //needs end marker

#ifndef NO_SCREEN_OUT

cout<<"name of fname: "<<fnamebuf->str()<<endl;

#endif

char temp[50];

        sprintf (temp, "gen_%d", gen);

status=pole1_epoch(pop,gen,temp);

//status=(pole1_epoch(pop,gen,fnamebuf->str()));


if (status) {

 runs[expcount]=status;

 gen=gens+1;

}


fnamebuf->clear();

delete fnamebuf;


      }

      

    }

    totalevals=0;

    for(expcount=0;expcount<NEAT::num_runs;expcount++) {

      cout<<runs[expcount]<<endl;

      totalevals+=runs[expcount];

    }

    cout<<"Average evals: "<<totalevals/NEAT::num_runs<<endl;

    return pop;

}

int pole1_epoch(Population *pop,int generation,char *filename) {

  vector<Organism*>::iterator curorg;

  vector<Species*>::iterator curspecies;

  //char cfilename[100];

  //strncpy( cfilename, filename.c_str(), 100 );

  //ofstream cfilename(filename.c_str());

  bool win=false;

  int winnernum;

  //Evaluate each organism on a test

  for(curorg=(pop->organisms).begin();curorg!=(pop->organisms).end();++curorg) {

    if (pole1_evaluate(*curorg)) win=true;

  }

  

  //Average and max their fitnesses for dumping to file and snapshot

  for(curspecies=(pop->species).begin();curspecies!=(pop->species).end();++curspecies) {

    //This experiment control routine issues commands to collect ave

    //and max fitness, as opposed to having the snapshot do it, 

    //because this allows flexibility in terms of what time

    //to observe fitnesses at

    (*curspecies)->compute_average_fitness();

    (*curspecies)->compute_max_fitness();

  }

  //Take a snapshot of the population, so that it can be

  //visualized later on

  //if ((generation%1)==0)

  //  pop->snapshot();

  //Only print to file every print_every generations

  if  (win||

       ((generation%(NEAT::print_every))==0))

    pop->print_to_file_by_species(filename);

  if (win) {

    for(curorg=(pop->organisms).begin();curorg!=(pop->organisms).end();++curorg) {

      if ((*curorg)->winner) {

winnernum=((*curorg)->gnome)->genome_id;

cout<<"WINNER IS #"<<((*curorg)->gnome)->genome_id<<endl;

      }

    }    

  }

  //Create the next generation

  pop->epoch(generation);

  if (win) return ((generation-1)*NEAT::pop_size+winnernum);

  else return 0;

}

bool pole1_evaluate(Organism *org) {

  Network *net;

  int numnodes;  /* Used to figure out how many nodes

   should be visited during activation */

  int thresh;  /* How many visits will be allowed before giving up 

 (for loop detection) */

  //  int MAX_STEPS=120000;

 int MAX_STEPS=100000;

  

  net=org->net;

  numnodes=((org->gnome)->nodes).size();

  thresh=numnodes*2;  //Max number of visits allowed per activation

  

  //Try to balance a pole now

  org->fitness = go_cart(net,MAX_STEPS,thresh);

#ifndef NO_SCREEN_OUT

  cout<<"Org "<<(org->gnome)->genome_id<<" fitness: "<<org->fitness<<endl;

#endif

  //Decide if its a winner

  if (org->fitness>=MAX_STEPS) { 

    org->winner=true;

    return true;

  }

  else {

    org->winner=false;

    return false;

  }

}

//     cart_and_pole() was take directly from the pole simulator written

//     by Richard Sutton and Charles Anderson.

int go_cart(Network *net,int max_steps,int thresh)

{

   float x, /* cart position, meters */

         x_dot, /* cart velocity */

         theta, /* pole angle, radians */

         theta_dot; /* pole angular velocity */

   int steps=0,y;

   int random_start=1;

   double in[5];  //Input loading array

   double out1;

   double out2;

//     double one_degree= 0.0174532; /* 2pi/360 */

//     double six_degrees=0.1047192;

   double twelve_degrees=0.2094384;

//     double thirty_six_degrees= 0.628329;

//     double fifty_degrees=0.87266;

   vector<NNode*>::iterator out_iter;

   if (random_start) {

     /*set up random start state*/

     x = (lrand48()%4800)/1000.0 - 2.4;

     x_dot = (lrand48()%2000)/1000.0 - 1;

     theta = (lrand48()%400)/1000.0 - .2;

     theta_dot = (lrand48()%3000)/1000.0 - 1.5;

    }

   else 

     x = x_dot = theta = theta_dot = 0.0;

   

   /*--- Iterate through the action-learn loop. ---*/

   while (steps++ < max_steps)

     {

       

       /*-- setup the input layer based on the four iputs --*/

       //setup_input(net,x,x_dot,theta,theta_dot);

       in[0]=1.0;  //Bias

       in[1]=(x + 2.4) / 4.8;;

       in[2]=(x_dot + .75) / 1.5;

       in[3]=(theta + twelve_degrees) / .41;

       in[4]=(theta_dot + 1.0) / 2.0;

       net->load_sensors(in);

       //activate_net(net);   /*-- activate the network based on the input --*/

       //Activate the net

       //If it loops, exit returning only fitness of 1 step

       if (!(net->activate())) return 1;

      /*-- decide which way to push via which output unit is greater --*/

       out_iter=net->outputs.begin();

       out1=(*out_iter)->activation;

       ++out_iter;

       out2=(*out_iter)->activation;

       if (out1 > out2)

y = 0;

       else

y = 1;

       

       /*--- Apply action to the simulated cart-pole ---*/

       cart_pole(y, &x, &x_dot, &theta, &theta_dot);

       

       /*--- Check for failure.  If so, return steps ---*/

       if (x < -2.4 || x > 2.4  || theta < -twelve_degrees ||

  theta > twelve_degrees) 

         return steps;             

     }

   

   return steps;

//     cart_and_pole() was take directly from the pole simulator written

//     by Richard Sutton and Charles Anderson.

//     This simulator uses normalized, continous inputs instead of 

//    discretizing the input space.

/*----------------------------------------------------------------------

   cart_pole:  Takes an action (0 or 1) and the current values of the

 four state variables and updates their values by estimating the state

 TAU seconds later.

----------------------------------------------------------------------*/

void cart_pole(int action, float *x,float *x_dot, float *theta, float *theta_dot) {

  float xacc,thetaacc,force,costheta,sintheta,temp;

  

  const float GRAVITY=9.8;

  const float MASSCART=1.0;

  const float MASSPOLE=0.1;

  const float TOTAL_MASS=(MASSPOLE + MASSCART);

  const float LENGTH=0.5;  /* actually half the pole's length */

  const float POLEMASS_LENGTH=(MASSPOLE * LENGTH);

  const float FORCE_MAG=10.0;

  const float TAU=0.02;  /* seconds between state updates */

  const float FOURTHIRDS=1.3333333333333;

  force = (action>0)? FORCE_MAG : -FORCE_MAG;

  costheta = cos(*theta);

  sintheta = sin(*theta);

  

  temp = (force + POLEMASS_LENGTH * *theta_dot * *theta_dot * sintheta)

    / TOTAL_MASS;

  

  thetaacc = (GRAVITY * sintheta - costheta* temp)

    / (LENGTH * (FOURTHIRDS - MASSPOLE * costheta * costheta

/ TOTAL_MASS));

  

  xacc  = temp - POLEMASS_LENGTH * thetaacc* costheta / TOTAL_MASS;

  

  /*** Update the four state variables, using Euler's method. ***/

  

  *x  += TAU * *x_dot;

  *x_dot += TAU * xacc;

  *theta += TAU * *theta_dot;

  *theta_dot += TAU * thetaacc;

}