#include "scheduler.h"
#include <signal.h>

#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <semaphore.h>
#include <netdb.h>
extern int h_errno;

extern char **environ;

#define MAXDATASIZE 100

// Currently, this function is unused.
void get_net_stats(FILE *fp, unsigned int & ip ,unsigned int & port){
    char *cmd = new char[MAX_SIZE];
    char ip_str[MAX_SIZE],port_str[MAX_SIZE];

    fgets(cmd,MAX_SIZE,fp);

    printf("after fgets\n");
    while(!strstr(cmd,"ccs"))
	fgets(cmd,MAX_SIZE,fp);

    fgets(cmd,MAX_SIZE,fp);

    while(!strstr(cmd,"ccs"))
        fgets(cmd,100,fp);

    cmd = strchr(cmd,'=') + 2;
    sscanf(cmd,"%[0-9]",ip_str);

    cmd = strchr(cmd,'=') + 2;
    sscanf(cmd,"%[0-9]",port_str);

    ip = (unsigned int) strtoul(ip_str,(char**)NULL,10);
    port = (unsigned int) strtoul(port_str,(char**)NULL,10);
    fclose(fp);

    return;
}

// Updates the "STATS" file in the current directory.  The STATS file
// is used by our CGI script to update the web display showing the
// jobs and CPU's being used.  This means the CGI script has to have
// access to this filesystem and file; which is not a good design.
void Scheduler::dump_stats(){
    // Overwrite it, if it exists.
    FILE * fp = fopen("STATS", "w+");

    fprintf(fp, "%d %d\n", nproc, njobs);

    Job *jptr = jlist;
    while(jptr != NULL){
	fprintf(fp, "%s %d\n", jptr->argv[5], jptr->num_allocated_proc);
	printf("######## in loop %s %d \n", jptr->argv[5], jptr->num_allocated_proc);
	jptr = jptr->next;
    }
    fclose(fp);
}

// returns -1 for failure, 1 for success.
// can we accept this job ?
int Scheduler::update_bitmap_q(Job *j){
    Job *jptr = jlist;
    // Number of CPU's in use by migratable jobs.
    int n_migrate_proc = 0;
    // Minimum number needed by migratable jobs.
    int min_used_proc = 0;
    // Number of CPU's currently available
    int avail = 0;
    // Maximum number available, if we minimized the CPU's on
    // migratable jobs.
    int max_avail = 0;
    int n_migrate_jobs = 0;

    if (j->min_proc > nproc)
       return -1;

    // If there are no jobs in the Job List, give this Job all the
    // CPU's it wants.
    if(jptr == NULL && j->min_proc <= nproc){
	return 1;
    }

    // Otherwise, let's find out the lower bound on CPU usage.
    while(jptr != NULL){
	if(jptr->type == CHARM){
	    min_used_proc += jptr->min_proc;
	    n_migrate_proc += jptr->num_allocated_proc;
	    n_migrate_jobs ++;
	}
	else if(jptr->type == MPI)
	    min_used_proc += jptr->num_allocated_proc;
	jptr = jptr->next;
    }

    // Sorry, all CPU's are in use.  We cannot accomodate the
    // requested job.
    if(min_used_proc == nproc){
	return -1;
    }

    //@@wrong max_avail = nproc - min_used_proc + num_free_proc;
    max_avail = (n_migrate_proc - min_used_proc) + num_free_proc;

    // Can we make available enough CPU's for this job ?
    if(max_avail < j->min_proc)
	return -1;

    //
    // Ok, now we know we can accomodate this job, so let's do it.
    //
    return 1;
}

// This function does the bulk of the work in allocating CPU's to a
// new job.  Although this function calculates the new bitmap for
// every migratable job, it does not tell the job to update itself.
int Scheduler::update_bitmap(Job *j){

    Job *jptr = jlist;
    // Number of CPU's in use by migratable jobs.
    int n_migrate_proc = 0;
    // Minimum number needed by migratable jobs.
    int min_used_proc = 0;
    // Number of CPU's currently available
    int avail = 0;
    // Maximum number available, if we minimized the CPU's on
    // migratable jobs.
    int max_avail = 0;
    int n_migrate_jobs = 0;

    if (j->min_proc > nproc)
       return -1;

    // If there are no jobs in the Job List, give this Job all the
    // CPU's it wants.
    if(jptr == NULL){
	for(int i=0; i < j->max_proc; i++){
	    j->add_proc(i);
	    free_proc_vector[i] = 0;
	    num_free_proc --;
	}
	return 0;
    }

    // Otherwise, lets find out the lower bound on CPU usage.
    while(jptr != NULL){
	if(jptr->type == CHARM){
	    min_used_proc += jptr->min_proc;
	    n_migrate_proc += jptr->num_allocated_proc;
	    n_migrate_jobs ++;
	}
	else if(jptr->type == MPI)
	    min_used_proc += jptr->num_allocated_proc;
	jptr = jptr->next;
    }

    // Sorry, all CPU's are in use.  We cannot accomodate the
    // requested job.
    if(min_used_proc == nproc){
	return -1;
    }

    //@@wrong max_avail = nproc - min_used_proc + num_free_proc;
    max_avail = (n_migrate_proc - min_used_proc) + num_free_proc;
    avail = (n_migrate_proc + num_free_proc) / (n_migrate_jobs + 1);

    if(avail > max_avail)
	avail = max_avail;

    // Can we make available enough CPU's for this job ?
    if(max_avail < j->min_proc)
	return -1;

    //
    // Ok, now we know we can accomodate this job, so let's do it.
    //

    jptr = jlist;
    int temp_proc = 0;
    int num_allocated = 0;

    // First, let's give the job the free CPU's, if any are available.
    if(num_free_proc > 0){
       //	while((num_allocated < j->max_proc) && (num_free_proc > 0))
	    for(int i=0; i<nproc; i++)
		if((num_allocated < j->max_proc) && free_proc_vector[i]){
		    j->add_proc(i);
		    free_proc_vector[i] = 0;
		    num_free_proc --;
		    num_allocated++;
		}
	if(num_allocated == j->max_proc)
	    return 0;
    }

    // If the job still needs CPU's, let's reallocate them away from
    // the migratable jobs.
    //
    // The outermost loop is because we might have to go through the
    // Job List several times.  We are trying to be fair by taking
    // away only one CPU from each running job during each iteration.
    while(num_allocated < j->min_proc){
        // Let's go through the list of jobs.
	jptr = jlist;
	while((jptr != NULL) && (num_allocated < j->min_proc))
           // Let's find a migratable job with potentially available
           // CPU's.  We'll reallocate only one CPU for now.
	    if((jptr->type == CHARM) &&
	       (jptr->num_allocated_proc > jptr->min_proc)){
		temp_proc = jptr->delete_proc();
		j->add_proc(temp_proc);
		num_allocated++;
		jptr = jptr->next;
	    }
    }

    // We've given the new job its min CPU's.  To be fair, we'll now
    // try and give it the average number of CPU's given to migratable
    // jobs.  We'll take away CPU's from migratable jobs which have
    // more CPU's than we do, without breaking their MIN CPU
    // constraint, of course.
    int alloc_flag = 0;
    while(num_allocated < avail){
	jptr = jlist;
	while((jptr != NULL) && (num_allocated < avail))
	    if((jptr->type == CHARM) &&
	       (jptr->num_allocated_proc > jptr->min_proc)
	       &&(jptr->num_allocated_proc > j->num_allocated_proc - 1)){
		temp_proc = jptr->delete_proc();
		j->add_proc(temp_proc);
		num_allocated++;
		alloc_flag = 1;
		jptr = jptr->next;
	    }
	if(!alloc_flag)
	    break;
	alloc_flag = 0;
    }

    return 0;
}

/*
void Scheduler::get_bitmap(char *bit_map, int jpos, int new_task_proc){

    int i;
    double start,end,width;

    width = ((double)(nproc))/(njobs);
    start = (jpos - 1)*width;
    end = jpos * width;

    for(i =0; i < nproc; i++){
	if((i >= (int) start) && (i < (int)end))
	    bit_map[i] = 1;
	else
	    bit_map[i] = 0;
    }
}
*/

void default_options(){
    printf("scheduler <port> <nproc> [<nodelist file>] \n");
}

void Scheduler::load_nodelist(char * node_listfile){
    FILE *fp;
    char *address = new char[100];
    int num_nodes = 0;

    fp = fopen(node_listfile, "r");

    node_list = new char*[nproc];

    fscanf(fp, "%s", address);
    while(num_nodes < nproc){
	if(strcmp(address, "host") == 0){
	    if(fscanf(fp, "%s", address) == 1){
		node_list[num_nodes++] = address;
		address = new char[100];
	    }
	    else break;
	}
	if(fscanf(fp, "%s", address) != 1)
	    break;
    }
    if(num_nodes == 0){
	printf("incorrect scheduler_nodelist file\n");
	exit(1);
    }
    if(num_nodes < nproc){
	printf("all processors not present in nodelist file\n");
	exit(1);
    }
}

// reg_unreg == 0 means register, 1 means unregister
int
register_with_central_manager(int reg_unreg, int my_port, char *dest, int port)
{
    int sockfd, numbytes;
    char buf[MAXDATASIZE];
    struct hostent *he;
    struct sockaddr_in their_addr; /* connector's address information */
    int status, argcount;

    if ((he=gethostbyname(dest)) == NULL) {  /* get the host info */
        perror("CMM gethostbyname");
        return 0;
    }

    if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
        perror("CMM socket");
        return 0;
    }

    their_addr.sin_family = AF_INET;         /* host byte order */
    their_addr.sin_port = htons(port);       /* short, network byte order */
    their_addr.sin_addr = *((struct in_addr *)he->h_addr);
    bzero(&(their_addr.sin_zero), 8);        /* zero the rest of the struct */

    // SIGPIPE is received if the socket stream connection is broken.
    signal(SIGPIPE, SIG_IGN);

    if (connect(sockfd, (struct sockaddr *)&their_addr,
		sizeof(struct sockaddr)) == -1) {
        perror("CMM connect");
        return 0;
    }

    // Prepare and send the reg/unreg command to the scheduler.
    char my_name[MAXDATASIZE+1];
    gethostname(my_name, MAXDATASIZE);
    char str[MAXDATASIZE+1];
    sprintf(str,"%s %s %d\n",
            (reg_unreg==0)? "CMRegister": "CMUnRegister",
            my_name, my_port);
    printf("Sending string [%s] to %s:%d\n", str, dest, port);
    write(sockfd, str, strlen(str));
    sprintf(str,"q\n");
    write(sockfd, str, strlen(str));

    close(sockfd);

    // Go back to default handling of broken streams
    signal(SIGPIPE,SIG_DFL);

    return 1; // success
}

// flag == 1, means a child has terminated.  remove_jobs() should be
// called.  remove_jobs() then resets the flag to 0.  Unfortunately,
// race conditions can occur, e.g. what if a job terminates just
// before remove_jobs() resets the flag ? @@
int flag;
// waiting is used by the handler to know whether it can call
// remove_jobs right away.  If waiting == 1, it can, because we are
// currently blocked.  Again, here a race condition can occur.
// However, because a signal interrupts execution (and does not allow
// it to go in parallel), I think we should be ok.
int waiting = 0;
void  handler(int id);
int scheduler_port;
Scheduler *theScheduler = 0;
//sem_t i_am_running;

void main(int argc, char ** argv){

//     register_with_central_manager(0, 1999, "bogota.ks.uiuc.edu", 1999);
//     exit(0);

    if(argc < 3){
	default_options();
	exit(1);
    }

    // Handle job termination by watching for child processes to exit.
    flag = 0;
    signal(SIGCLD,handler);
//      sem_init(&i_am_running, 0, 1);
//      sem_wait(&i_am_running);

    // signal(SIGINT,handler); SIGINT==2, Control-C
    printf("Starting scheduler.  Use Control-C to exit.\n");

    scheduler_port = atoi(argv[1]);
    Scheduler sch(scheduler_port, atoi(argv[2]));
    theScheduler = &sch;

    if(argc >= 4) {
        printf("Loading nodelist file: %s.\n", argv[2]);
	sch.load_nodelist(argv[2]);
    } else {
        printf("Loading nodelist file: scheduler_nodelist.\n");
	sch.load_nodelist("scheduler_nodelist");
    }

    sch.start_scheduler();
}

Scheduler::Scheduler(int port, int nproc){

    int sockfd;

    job_exited = 0;
    n_wait_jobs = 0;
    jlist = NULL;
    waitq = NULL;
    this->port = port;
    max_jid = 0;
    njobs = 0;
    this->nproc = nproc;
    struct sockaddr_in my_addr;

    if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
        perror("socket");
        exit(1);
    }

//      int des_flag = fcntl(sockfd, F_GETFL);
//      fcntl(sockfd, F_SETFL,O_NONBLOCK|des_flag);
    this->sch_socfd = sockfd;

    my_addr.sin_family = AF_INET;
    my_addr.sin_port = htons(port);
    my_addr.sin_addr.s_addr = INADDR_ANY;
    bzero(&(my_addr.sin_zero), 8);

    free_proc_vector = new char[nproc];
    memset(free_proc_vector, 1, nproc);
    num_free_proc = nproc;

    if (bind(sockfd, (struct sockaddr *)&my_addr,
	     sizeof(struct sockaddr)) == -1) {
        perror("bind");
        close(sockfd);
        exit(1);
    }

}

void Scheduler::start_scheduler(){
    int client_fd;
    socklen_t sin_size;
    struct sockaddr_in their_addr;
    char cmd[MAX_SIZE];
    FILE * client_fp;

    if (listen(sch_socfd, MAX_CONN) == -1) {
	perror("listen");
	exit(1);
    }

    while(1){ //Accept loop.

        if(flag){
            remove_jobs(); // resets flag to zero
	    dump_stats();
	}

        printf("================\nAwaiting faucet request:\n");
        waiting = 1;
	sin_size = sizeof(struct sockaddr_in);
	if ((client_fd = accept(sch_socfd, (struct sockaddr *)
				&their_addr, &sin_size))== -1) {
	  //	    perror("accept");
	    continue;
	}
        printf("Received faucet request:\n");
        waiting = 0;

	client_fp = fdopen(client_fd,"r+");

	//	recv(client_fd,cmd,1000,0);

	fgets(cmd,MAX_SIZE,client_fp);

	job_stats jstat;
	jstat.jid = 0;
	jstat.fd = 0;
        char cmd_cpy[MAX_SIZE];
        strncpy(cmd_cpy, cmd, MAX_SIZE);
	jstat.cmd = cmd_cpy;
	jstat.num_system_proc = nproc;
	Job j_q(jstat);
        //printf("==%s==%d\n", j_q.argv[5], strlen(j_q.argv[5]));
        if (strncmp(j_q.argv[5], "QUERY", 5) == 0) {
           if (update_bitmap_q(&j_q) == 1)
              fputs("yes\n", client_fp);
           else
              fputs("no\n", client_fp);
           fclose(client_fp);
           continue;
        }

	max_jid++;
	njobs++;
//  	job_stats jstat;
	jstat.jid = max_jid;
	jstat.fd = client_fd;
	jstat.cmd = cmd;
	jstat.num_system_proc = nproc;
	Job j(jstat);

        if(schedule_job(j) == -1){
	    Job * new_job = new Job;
	    *new_job = j;
	    new_job->next = waitq;
	    waitq = new_job;
	    n_wait_jobs ++;
	}
	else start_job(jlist);

	printf(" after schedule \n");
	dump_stats();
    }
}

// When starting to execute a new non-migratable job, we have to
// create a nodelist file telling it which CPU's to use.
void Scheduler::create_nodes_file(char *name, char *bitmap){
    int i;

    FILE * fp = fopen(name, "w+");

    for(i=0; i < nproc; i++)
	if(bitmap[i])
	    fprintf(fp, "%s\n", node_list[i]);

    fclose(fp);
    return;
}

int Scheduler::schedule_job(Job j){
    Job * jptr = this->jlist;

    printf("in schedule job %d\n",jlist);

    // Can we accomodate this job ?
    if(update_bitmap(&j) == -1)
	return -1;

    // We prepare the job's command-line arguments here.
    if(j.type == MPI){
        // MPI jobs are given the number of CPU's and the nodelist
        // file.
	printf("here\n");
	char *num_proc_string = new char[10];
	sprintf(num_proc_string, "%d", j.num_allocated_proc);
	j.argv[2] = num_proc_string;
	char *nodesfile = new char[20];
	sprintf(nodesfile, "nodelist.%d", j.job_id);
	create_nodes_file(nodesfile, j.bit_map);
	j.argv[4] = nodesfile;
    }
    else if(j.type == CHARM){
        // Migratable jobs are told which port to listen on, for
        // bitmap updates.
	j.argv[4] = new char[10];
	sprintf(j.argv[4], "%d", scheduler_port + j.job_id + 1);
    }

    // Now we inform the migratable jobs that their bitmap might have
    // been changed.
    int jcount=0;
    while(jptr != NULL){
	if(jptr->type == CHARM){
	    jptr->set_bitmap();
	}
	jptr = jptr->next;
    }

    // Put this job at the head of the Job List
    Job * new_job = new Job;
    *new_job = j;
    new_job->next = jlist;
    jlist = new_job;
    printf("out of schedule job %d\n", jlist);
}

void Scheduler::start_job(Job *j){
    int childpid;

    printf(" in start \n");

    if(!(childpid = fork())){
        // This is the child of the fork, i.e. the new job.

        // Connect stdin, stdout, stderr to the client that requested
        // the job.
	close(0);
	close(1);
	close(2);
	dup(j->client_soc_fd);
	dup(j->client_soc_fd);
	dup(j->client_soc_fd);

	if(j->type == CHARM)
	  execv("conv-host", j->argv);
	else if(j->type == MPI)
	  execv("/usr/local/bin/mpirun", j->argv);
    }

    // This is the parent of the fork, i.e. the scheduler.

    j->pid = childpid;
    if(j->type == CHARM){
      j->port = scheduler_port + j->job_id + 1;
      j->ip =0;
      j->connect();
      j->set_bitmap();
    }

    return;
}

void Scheduler::remove_jobs(){
    Job *jptr,*prev = NULL;

    printf("In remove Job\n");
    jptr = jlist;
    int status;

    flag = 0;

    while((jlist != NULL) &&
	  (jlist->pid == waitpid(jlist->pid,&status,WNOHANG))){
	jlist = jlist->next;
	num_free_proc += jptr->destroy(free_proc_vector);
	jptr = jlist;
	njobs --;
    }

    prev = jptr;

    while(jptr != NULL){
	if(jptr->pid == waitpid(jptr->pid,&status,WNOHANG)){
	    prev->next = jptr->next;
	    num_free_proc += jptr->destroy(free_proc_vector);
	    njobs --;
	}
	prev = jptr;
	jptr = jptr->next;
    }

    jptr = waitq;
    prev = NULL;
    while(jptr != NULL){
	if(schedule_job(*jptr) != -1){
	    if(prev)
		prev->next = jptr->next;
	    else
		waitq = waitq->next;
	    n_wait_jobs --;
	    start_job(jlist);
	    delete jptr;
	    if(prev)
	      jptr = prev->next;
	    else
	      jptr = waitq;
	}
	else{
	    prev = jptr;
	    jptr = jptr->next;
	}
    }

    if(jlist == NULL)
	return;

    if(num_free_proc == 0)
	return;

    jptr = jlist;
    while((jptr->type != CHARM) && (jptr != NULL) &&
	  (jptr->num_allocated_proc >= jptr->max_proc))
	jptr = jptr->next;

    if(jptr == NULL)
	return;

    int min_pes = jptr->num_allocated_proc;
    Job * min_proc_job = jptr;
    Job * startptr = jptr;

    for(int i=0; i<nproc; i++){
	if(!free_proc_vector[i])
	    continue;
	while(jptr != NULL){
	    if((jptr->type == CHARM) && (jptr->num_allocated_proc < min_pes)
	       && (jptr->num_allocated_proc < jptr->max_proc)){
		min_proc_job = jptr;
		min_pes = jptr->num_allocated_proc;
	    }
	    jptr = jptr->next;
	}

	if(min_proc_job == NULL)
	    break;
	printf("adding processor %d to %d\n", i, min_proc_job->job_id);
	min_proc_job->add_proc(i);
	free_proc_vector[i] = 0;
	num_free_proc --;
	min_pes = nproc + 1;
	min_proc_job = NULL;
	jptr = startptr;
    }

    jptr = jlist;
    while(jptr != NULL){
	if(jptr->type == CHARM)
	    jptr->set_bitmap();
	jptr = jptr->next;
    }
}

void  handler(int id) {
    printf("Signal %d received.  17 means a child has terminated.\n", id);

    // In SVR5 unix, when a signal is raised, its handler gets reset to
    // the default.  So we need to do this.
    signal(SIGCLD,handler);

    flag = 1;
    if (theScheduler != 0 && (waiting == 1)) {
       theScheduler->remove_jobs(); // resets flag to zero
       theScheduler->dump_stats();
    }
}
