import sys, random
import sim

# Task states
TASK_UNDONE    = -1
TASK_REMOTE    = -2
TASK_COMPLETED = -3

# RON type used (default is direct internet routing)
RON = "0"

# Number of seconds for worker lease (not used since workers don't crash)
WORKER_LEASE = 60

# Number of seconds for controller leases
CONTROLLER_LEASE = 10*60

# Number of seconds between each controller state update
PMAX = 50

# Some sanity checks on the variables
assert PMAX < CONTROLLER_LEASE, "PMAX must be less than CONTROLLER_LEASE"
assert RON in ['0', '1', '2'], "RON type must be either '0', '1', or '2'"


## Simple log function for output to a file
def log(msg, file=sys.stdout):
    file.write(msg)
    file.flush()


class Controller:

    def __init__(self, name, num_workers, tasks, hosts, current_time):
        self.hosts = hosts
        self.host_id = self.hosts.index(name)
        self.num_workers = num_workers
        self.num_tasks = len(tasks)
        self.task_duration = tasks
        self.wq = [TASK_UNDONE]*self.num_tasks
        self.timeouts = [0.0]*self.num_tasks
        self.tasklist = {}
        self.completed = []
        self.allocated = []
        self.remaining_tasks = self.num_tasks
        self.next_completion = 0.0
        self.current_time = current_time
        self.pmax = current_time + random.uniform(0, 1) + PMAX
        self.num_allocated = 0
        self.terminated = 0
        # Initialize controller lease values to DRY_RUN
        self.leases = {}
        for h in hosts:
            if h != name: self.leases[h] = DRY_RUN
        # Initialize local set of tasks
        self.init_schedule()
        # Print banner
        print 'Controller %d starts with task range %d : %d' % \
              (self.host_id, self.s1, self.s2)

    def init_schedule(self):
        if self.num_tasks < len(self.hosts)*self.num_workers:
            # Less tasks than total number of workers
            start = self.host_id * self.num_workers
            if start < self.num_tasks:
                self.s1 = start
                self.s2 = start+self.num_workers
                if self.s2 > self.num_tasks:
                    self.s2 = self.num_tasks
            else:
                self.s1 = self.s2 = 0
        else:
            # More tasks than total number of workers
            self.s1 = (self.num_tasks/len(self.hosts)) * self.host_id
            self.s2 = (self.num_tasks/len(self.hosts)) * (self.host_id + 1)
            if self.host_id == len(self.hosts)-1:
                self.s2 = self.num_tasks
        # Set timeouts for the initial distribution
        self.set_initial_timeouts()

    def set_initial_timeouts(self):
        # Set the initial task timeouts on tasks
        for t_id in range(self.num_tasks):
            if t_id < self.s1 or t_id >= self.s2:
                timeout = self.current_time + \
                          float(self.task_duration[t_id]) + \
                          random.uniform(0, 0.5) + WORKER_LEASE
                self.timeouts[t_id] = timeout
                self.wq[t_id] = TASK_REMOTE

    def pick_task(self):
        # Try picking a task from the local set first, then from the remote set
        s = self.wq[self.s1:self.s2]
        try:
            t_id = s.index(TASK_UNDONE)
        except ValueError:
            try:
                t_id = self.wq.index(TASK_UNDONE)
            except ValueError:
                return 0
        else:
            t_id += self.s1
        self.tasklist[t_id] = self.current_time + float(self.task_duration[t_id]) + \
                              random.uniform(0, 0.1)
        # Set worker lease (not used, but in there anyway)
        self.timeouts[t_id] = self.tasklist[t_id] + WORKER_LEASE
        self.wq[t_id] = self.host_id
        self.allocated.append((t_id, self.host_id))
        self.num_allocated += 1
        return 1

    def find_next_task_completion(self):
        # Find which task among the allocated ones finish first
        completion = self.tasklist.values()
        if len(completion) > 0:
            completion.sort()
            self.next_completion = completion[0]
        else:
            self.next_completion = None

    def complete_task(self):
        # Find which task completed and mark task as done locally
        for t_id in self.tasklist:
            if self.tasklist[t_id] == self.next_completion:
                self.completed.append(t_id)
                del self.tasklist[t_id]
                if self.wq[t_id] != TASK_COMPLETED:
                    self.wq[t_id] = TASK_COMPLETED
                    self.remaining_tasks -= 1
                return

    def run_controller(self):
        # Check if this is just a multicast update
        if self.current_time == self.pmax:
            # Set the next timeout for a multicast and return
            self.pmax += PMAX
        # ... or check if a task has completed
        elif self.current_time == self.next_completion:
            self.complete_task()
        #print self.host_id, len(self.tasklist), self.remaining_tasks, self.current_time
        # Return if there are no more remaining tasks
        if self.remaining_tasks <= 0:
            return
        # Allocate tasks if there are idle workers
        if len(self.tasklist) < self.num_workers:
            while self.pick_task() == 1:
                if len(self.tasklist) >= self.num_workers:
                    break
        # Find when the next local task completes
        self.find_next_task_completion()


class Driver:

    def __init__(self, ronfile, taskfile, hostfile, num_workers, pmax_only, ron_type=RON):
        self.topology = sim.Topology(ronfile, ron_type)
        self.topology.advance(DRY_RUN)
        tasks = taskfile.readlines()
        self.hosts = filter(None, hostfile.read().split('\n'))
        self.num_tasks = len(tasks)
        self.num_workers = num_workers
        self.ron_type = ron_type
        self.controllers = {}
        self.completed = {}
        self.allocated = {}
        self.pmax_only = pmax_only
        # Instantiate controllers
        for host in self.hosts:
            self.controllers[host] = Controller(host, num_workers, tasks, self.hosts,
                                                self.topology.clock)
        # Print banner and startup information
        print
        print '%d controllers, %d workers per controller, %d tasks' % \
              (len(self.hosts), num_workers, self.num_tasks)
        print 'RON sample type:', ron_type
        print 'Dry-run period: %d secs' % DRY_RUN
        print 'Controller lease timeout: %d secs' % CONTROLLER_LEASE
        print 'PMAX: %d secs' % PMAX
        if self.pmax_only:
            print 'Only sending broadcast at PMAX intervals'
        else:
            print 'Sending broadcast at task completion and PMAX intervals'
        print
        print 'Starting simulation, this may take a while...'

    def finish(self, con, send_failure, send_success):
        total_tasks = 0
        for host in self.controllers:
            total_tasks += self.controllers[host].num_allocated
        print '#T\t#C\t#W\tRON\t#Red\tTime\t#Sf\t#Ss\t#Bc'
        log("%d\t%d\t%d\t%d\t" % (self.num_tasks, len(self.hosts), self.num_workers,
                                      int(self.ron_type)))
        log("%d\t%d\t%d\t%d\t%d\n" % (total_tasks-self.num_tasks,
                                      int(self.topology.clock-DRY_RUN),
                                      send_failure,
                                      send_success,
                                      (send_failure+send_success)/(len(self.hosts)-1)))
        print 'Computation finished at controller', self.controllers[con].host_id
        print 'Elapsed time (in seconds):', int(self.topology.clock-DRY_RUN)
        print 'Redundant tasks:', total_tasks - self.num_tasks 
        print 'Host to host send failures:', send_failure
        print 'Host to host send successes:', send_success
        print 'Total number of multicasts:', (send_failure+send_success)/(len(self.hosts)-1)

    def update_state(self, remote, local):
        # Update completed tasks on remote host
        for t_id in self.completed[local]:
            if self.controllers[remote].wq[t_id] != TASK_COMPLETED:
                self.controllers[remote].wq[t_id] = TASK_COMPLETED
                self.controllers[remote].remaining_tasks -= 1
                if not self.completed[remote].has_key(t_id):
                    self.completed[remote][t_id] = None
        # Update allocated tasks on remote host
        for t_id in self.allocated[local]:
            e = self.controllers[remote].wq[t_id]
            if e == TASK_UNDONE or e == TASK_REMOTE:
                self.controllers[remote].wq[t_id] = self.allocated[local][t_id]
                self.controllers[remote].timeouts[t_id] = self.controllers[local].timeouts[t_id]
                if not self.allocated[remote].has_key(t_id):
                    self.allocated[remote][t_id] = self.allocated[local][t_id]
        # Update the controller leases
        self.controllers[remote].leases[local] = self.controllers[local].current_time
        for c in self.controllers:
            if c != local and c != remote:
                if self.controllers[remote].leases[c] < self.controllers[local].leases[c]:
                    self.controllers[remote].leases[c] = self.controllers[local].leases[c]

    def run(self):
        send_failure = 0
        send_success = 0
        max_timeslice = 2**30
        timeslice = max_timeslice
        con = None
        state_update = 0
        num_alive = len(self.controllers)
        progress = 0

        # Do initial run to find whether a controller finishes a task first or a
        # state update should be run
        for c in self.controllers:
            self.completed[c] = {}
            self.allocated[c] = {}
            self.controllers[c].run_controller()
            for tid, hid in self.controllers[c].allocated:
                self.allocated[c][tid] = hid 
            self.controllers[c].allocated = []
            next_completion = self.controllers[c].next_completion
            pmax = self.controllers[c].pmax
            if next_completion != None and next_completion < timeslice:
                timeslice = next_completion
                state_update = 0
                con = c
            if pmax < timeslice:
                timeslice = pmax
                state_update = 1
                con = c
        timeslice -= self.topology.clock

        # Assert that at least one task has been allocated by a controller
        assert con != None, "No task or state update was allocated"

        while 1:
            # Advance topology until the next task finishes
            if self.topology.advance(timeslice) == 0:
                return

            # Run controller logic and collect the completed and allocated tasks
            self.controllers[con].current_time = self.topology.clock
            self.controllers[con].run_controller()
            for c in self.controllers[con].completed:
                self.completed[con][c] = None
            for tid, hid in self.controllers[con].allocated:
                self.allocated[con][tid] = hid
            # Completed and allocated tasks for this controller is now reset
            self.controllers[con].completed = []
            self.controllers[con].allocated = []

            # Multicast state from controller to other controllers at regular
            # intervals (pmax) and when new tasks have been allocated
            if (self.pmax_only and state_update) or not self.pmax_only:
                for remote_con in self.hosts:
                    if remote_con != con:
                        if self.topology.link(con, remote_con) == sim.LINK_UP:
                            self.update_state(remote_con, con)
                            send_success += 1
                        else:
                            send_failure += 1

            # Check for leases that have expired and mark tasks allocated by that
            # controller as undone
            t = self.controllers[con].current_time
            for c in self.controllers[con].leases:
                if t - self.controllers[con].leases[c] > CONTROLLER_LEASE:
                    #print 'Lease expired for controller %d on controller %d' %\
                    #      (self.controllers[c].host_id, self.controllers[con].host_id)
                    for i in range(self.controllers[c].s1, self.controllers[c].s2):
                        e = self.controllers[con].wq[i]
                        if e == TASK_REMOTE or e == self.controllers[c].host_id:
                            #print 'Setting task as not done', con, i
                            self.controllers[con].wq[i] = TASK_UNDONE

            # Check if the computation has completed
            for c in self.controllers:
                if self.controllers[c].remaining_tasks == 0 and \
                   not self.controllers[c].terminated:
                    self.controllers[c].terminated = 1
                    print 'Controller %d completed at time %.2f, %d left' %\
                        (self.controllers[c].host_id, self.controllers[con].current_time,
                         num_alive-1)
                    num_alive -= 1
                    if num_alive == 0:
                        self.finish(c, send_failure, send_success)
                        return

            # Find when the next task finishes and continue loop
            timeslice = max_timeslice
            for c in self.controllers:
                # Only pick task completions from controllers that have not terminated
                if not self.controllers[c].terminated:
                    next_completion = self.controllers[c].next_completion
                    if next_completion != None and next_completion < timeslice:
                        timeslice = next_completion
                        state_update = 0
                        con = c
                pmax = self.controllers[c].pmax
                if pmax < timeslice:
                    timeslice = pmax
                    state_update = 1
                    con = c
            timeslice -= self.topology.clock

            # Print progress every 50 seconds in the trace (uncomment to activate)
            #if progress + 50 < self.topology.clock:
            #    progress = self.topology.clock
            #    print progress
            #    for c in self.controllers:
            #        t = self.controllers[c]
            #        print 'Controller', t.host_id, t.remaining_tasks, len(t.tasklist)
            #    print '=' * 70


## Commandline interface
if __name__=="__main__":
    try:
        # Just ignore this, it's an optimizer you can find at psyco.sf.net
        import psyco
        psyco.bind(Controller, 10)
        psyco.bind(Driver, 10)
    except:
        pass
    if len(sys.argv) < 7:
        print "Usage: python algo3.py <tracefile> <taskfile> <hostfile> <# of workers> <dry run> <pmax only>"
        raise SystemExit
    ron_file, task_file, host_file, num_w, dryrun, pmax_only = sys.argv[1:]
    DRY_RUN = float(dryrun)
    d = Driver(open(ron_file), open(task_file), open(host_file), int(num_w), int(pmax_only))
    d.run()
