This is another resource-allocation problem. My goal is to run a query to assign the top-priority job for any time-slot to one of two CPU cores (just an example, so let's assume no interrupts or multi-tasking). Note: this is similar to my earlier post about partitioning, but focuses on overlapping times and assigning multiple items, not just the top-priority item.
Here is our object:
public class Job
{
public int Id;
public int Priority;
public DateTime Begin;
public DateTime End;
}
The real dataset is very large, but for this example, let’s say there are 1000 jobs to be assigned to two CPU cores. They are all loaded into memory, and I need to run a single LINQ to Objects query against them. This is currently taking almost 8 seconds and 1.4 million comparisons.
I have leveraged the logic cited in this post to determine whether two items are overlapping, but unlike that post, I don't simply need to find overlapping items, but to schedule the top item of any overlapping set, and then schedule the next one.
Before I get to the code, let me point out the steps of the current inneficient algorithm:
- Determine the remaining jobs (by removing any already assigned)
- Assign jobs to the current core by self-joining all remaining jobs and selecting the top overlapping job by priority.
- Concatenate the new jobs by executing the query
- Repeat starting at Stop 1 for each remaining core
Questions:
- How can this be done most efficiently?
- Can I avoid the sub-query in step 2? Perhaps by grouping, but I'm not sure how I can group based on the .Overlaps() extension method.
- The jobs are already ordered. Can step 2 leverage that fact that and only compare against items within a short range instead of every single job?
- Is there a more efficient to assign to cores rather than looping? This could avoid executing the query in Step 3? Again, if I could group sets of overlaped jobs, then assigning cores is simply a matter of selecting N per overlaped set.
Full Sample Code:
public class Job
{
public static long Iterations;
public int Id;
public int Priority;
public DateTime Begin;
public DateTime End;
public bool Overlaps(Job other)
{
Iterations++;
return this.End > other.开发者_如何学GoBegin && this.Begin < other.End;
}
}
public class Assignment
{
public Job Job;
public int Core;
}
class Program
{
static void Main(string[] args)
{
const int Jobs = 1000;
const int Cores = 2;
const int ConcurrentJobs = Cores + 1;
const int Priorities = Cores + 3;
DateTime startTime = new DateTime(2011, 3, 1, 0, 0, 0, 0);
Console.WriteLine(string.Format("{0} Jobs x {1} Cores", Jobs, Cores));
var timer = Stopwatch.StartNew();
Console.WriteLine("Populating data");
var jobs = new List<Job>();
for (int jobId = 0; jobId < Jobs; jobId++)
{
var jobStart = startTime.AddHours(jobId / ConcurrentJobs).AddMinutes(jobId % ConcurrentJobs);
jobs.Add(new Job() { Id = jobId, Priority = jobId % Priorities, Begin = jobStart, End = jobStart.AddHours(0.5) });
}
Console.WriteLine(string.Format("Completed in {0:n}ms", timer.ElapsedMilliseconds));
timer.Restart();
Console.WriteLine("Assigning Jobs to Cores");
IEnumerable<Assignment> assignments = null;
for (int core = 0; core < Cores; core++)
{
// avoid modified closures by creating local variables
int localCore = core;
var localAssignments = assignments;
// Step 1: Determine the remaining jobs
var remainingJobs = localAssignments == null ?
jobs :
from j in jobs where !(from a in localAssignments select a.Job).Contains(j) select j;
// Step 2: Assign the top priority job in any time-slot to the core
var assignmentsForCore = from s1 in remainingJobs
where
(from s2 in remainingJobs
where s1.Overlaps(s2)
orderby s2.Priority
select s2).First().Equals(s1)
select new Assignment { Job = s1, Core = localCore };
// Step 3: Accumulate the results (unfortunately requires a .ToList() to avoid massive over-joins)
assignments = assignments == null ? assignmentsForCore.ToList() : assignments.Concat(assignmentsForCore.ToList());
}
// This is where I'd like to Execute the query one single time across all cores, but have to do intermediate steps to avoid massive-over-joins
assignments = assignments.ToList();
Console.WriteLine(string.Format("Completed in {0:n}ms", timer.ElapsedMilliseconds));
Console.WriteLine("\nJobs:");
foreach (var job in jobs.Take(20))
{
Console.WriteLine(string.Format("{0}-{1} Id {2} P{3}", job.Begin, job.End, job.Id, job.Priority));
}
Console.WriteLine("\nAssignments:");
foreach (var assignment in assignments.OrderBy(a => a.Job.Begin).Take(10))
{
Console.WriteLine(string.Format("{0}-{1} Id {2} P{3} C{4}", assignment.Job.Begin, assignment.Job.End, assignment.Job.Id, assignment.Job.Priority, assignment.Core));
}
Console.WriteLine(string.Format("\nTotal Comparisons: {0:n}", Job.Iterations));
Console.WriteLine("Any key to continue");
Console.ReadKey();
}
}
Sample Output:
1000 Jobs x 2 Cores
Populating data Completed in 0.00ms Assigning Jobs to Cores Completed in 7,998.00ms Jobs: 3/1/2011 12:00:00 AM-3/1/2011 12:30:00 AM Id 0 P0 3/1/2011 12:01:00 AM-3/1/2011 12:31:00 AM Id 1 P1 3/1/2011 12:02:00 AM-3/1/2011 12:32:00 AM Id 2 P2 3/1/2011 1:00:00 AM-3/1/2011 1:30:00 AM Id 3 P3 3/1/2011 1:01:00 AM-3/1/2011 1:31:00 AM Id 4 P4 3/1/2011 1:02:00 AM-3/1/2011 1:32:00 AM Id 5 P0 3/1/2011 2:00:00 AM-3/1/2011 2:30:00 AM Id 6 P1 3/1/2011 2:01:00 AM-3/1/2011 2:31:00 AM Id 7 P2 3/1/2011 2:02:00 AM-3/1/2011 2:32:00 AM Id 8 P3 3/1/2011 3:00:00 AM-3/1/2011 3:30:00 AM Id 9 P4 3/1/2011 3:01:00 AM-3/1/2011 3:31:00 AM Id 10 P0 3/1/2011 3:02:00 AM-3/1/2011 3:32:00 AM Id 11 P1 3/1/2011 4:00:00 AM-3/1/2011 4:30:00 AM Id 12 P2 3/1/2011 4:01:00 AM-3/1/2011 4:31:00 AM Id 13 P3 3/1/2011 4:02:00 AM-3/1/2011 4:32:00 AM Id 14 P4 3/1/2011 5:00:00 AM-3/1/2011 5:30:00 AM Id 15 P0 3/1/2011 5:01:00 AM-3/1/2011 5:31:00 AM Id 16 P1 3/1/2011 5:02:00 AM-3/1/2011 5:32:00 AM Id 17 P2 3/1/2011 6:00:00 AM-3/1/2011 6:30:00 AM Id 18 P3 3/1/2011 6:01:00 AM-3/1/2011 6:31:00 AM Id 19 P4 Assignments: 3/1/2011 12:00:00 AM-3/1/2011 12:30:00 AM Id 0 P0 C0 3/1/2011 12:01:00 AM-3/1/2011 12:31:00 AM Id 1 P1 C1 3/1/2011 1:00:00 AM-3/1/2011 1:30:00 AM Id 3 P3 C1 3/1/2011 1:02:00 AM-3/1/2011 1:32:00 AM Id 5 P0 C0 3/1/2011 2:00:00 AM-3/1/2011 2:30:00 AM Id 6 P1 C0 3/1/2011 2:01:00 AM-3/1/2011 2:31:00 AM Id 7 P2 C1 3/1/2011 3:01:00 AM-3/1/2011 3:31:00 AM Id 10 P0 C0 3/1/2011 3:02:00 AM-3/1/2011 3:32:00 AM Id 11 P1 C1 3/1/2011 4:00:00 AM-3/1/2011 4:30:00 AM Id 12 P2 C0 3/1/2011 4:01:00 AM-3/1/2011 4:31:00 AM Id 13 P3 C1 3/1/2011 5:00:00 AM-3/1/2011 5:30:00 AM Id 15 P0 C0 Total Comparisons: 1,443,556.00 Any key to continue
Is there a reason for using linq to object collections for this task? I think that I would create an active list, put all of the jobs in a queue and pop the next one out of the queue whenever the active list dipped below 10 and stick it into the active list. It's easy enough to track which core is executing which task and assign the next task in the queue the the least busy core. Wire up a finished event to the job or just monitor the active list and you'll know when it's appropriate to pop another job off the queue and into the active list.
I would rather do it in a single loop. My produces a different result from yours. Yours scheduled 2/3 of all the jobs. Mine scheduled all. I will add explanations later. Going off for an appointment now.
public class Job
{
public static long Iterations;
public int Id;
public int Priority;
public DateTime Begin;
public DateTime End;
public bool Overlaps(Job other)
{
Iterations++;
return this.End > other.Begin && this.Begin < other.End;
}
}
public class Assignment : IComparable<Assignment>
{
public Job Job;
public int Core;
#region IComparable<Assignment> Members
public int CompareTo(Assignment other)
{
return Job.Begin.CompareTo(other.Job.Begin);
}
#endregion
}
class Program
{
static void Main(string[] args)
{
const int Jobs = 1000;
const int Cores = 2;
const int ConcurrentJobs = Cores + 1;
const int Priorities = Cores + 3;
DateTime startTime = new DateTime(2011, 3, 1, 0, 0, 0, 0);
Console.WriteLine(string.Format("{0} Jobs x {1} Cores", Jobs, Cores));
var timer = Stopwatch.StartNew();
Console.WriteLine("Populating data");
var jobs = new List<Job>();
for (int jobId = 0; jobId < Jobs; jobId++)
{
var jobStart = startTime.AddHours(jobId / ConcurrentJobs).AddMinutes(jobId % ConcurrentJobs);
jobs.Add(new Job() { Id = jobId, Priority = jobId % Priorities, Begin = jobStart, End = jobStart.AddHours(0.5) });
}
Console.WriteLine(string.Format("Completed in {0:n}ms", timer.ElapsedMilliseconds));
timer.Reset();
Console.WriteLine("Assigning Jobs to Cores");
List<Assignment>[] assignments = new List<Assignment>[Cores];
for (int core = 0; core < Cores; core++)
assignments[core] = new List<Assignment>();
Job[] lastJobs = new Job[Cores];
foreach (Job j in jobs)
{
Job job = j;
bool assigned = false;
for (int core = 0; core < Cores; core++)
{
if (lastJobs[core] == null || !lastJobs[core].Overlaps(job))
{
// Assign directly if no last job or no overlap with last job
lastJobs[core] = job;
assignments[core].Add(new Assignment { Job = job, Core = core });
assigned = true;
break;
}
else if (job.Priority > lastJobs[core].Priority)
{
// Overlap and higher priority, so we replace
Job temp = lastJobs[core];
lastJobs[core] = job;
job = temp; // Will try to later assign to other core
assignments[core].Add(new Assignment { Job = job, Core = core });
assigned = true;
break;
}
}
if (!assigned)
{
// TODO: What to do if not assigned? Your code seems to just ignore them
}
}
List<Assignment> merged = new List<Assignment>();
for (int core = 0; core < Cores; core++)
merged.AddRange(assignments[core]);
merged.Sort();
Console.WriteLine(string.Format("Completed in {0:n}ms", timer.ElapsedMilliseconds));
timer.Reset();
Console.WriteLine(string.Format("\nTotal Comparisons: {0:n}", Job.Iterations));
Job.Iterations = 0; // Reset to count again
{
IEnumerable<Assignment> assignments2 = null;
for (int core = 0; core < Cores; core++)
{
// avoid modified closures by creating local variables
int localCore = core;
var localAssignments = assignments2;
// Step 1: Determine the remaining jobs
var remainingJobs = localAssignments == null ?
jobs :
from j in jobs where !(from a in localAssignments select a.Job).Contains(j) select j;
// Step 2: Assign the top priority job in any time-slot to the core
var assignmentsForCore = from s1 in remainingJobs
where
(from s2 in remainingJobs
where s1.Overlaps(s2)
orderby s2.Priority
select s2).First().Equals(s1)
select new Assignment { Job = s1, Core = localCore };
// Step 3: Accumulate the results (unfortunately requires a .ToList() to avoid massive over-joins)
assignments2 = assignments2 == null ? assignmentsForCore.ToList() : assignments2.Concat(assignmentsForCore.ToList());
}
// This is where I'd like to Execute the query one single time across all cores, but have to do intermediate steps to avoid massive-over-joins
assignments2 = assignments2.ToList();
Console.WriteLine(string.Format("Completed in {0:n}ms", timer.ElapsedMilliseconds));
Console.WriteLine("\nJobs:");
foreach (var job in jobs.Take(20))
{
Console.WriteLine(string.Format("{0}-{1} Id {2} P{3}", job.Begin, job.End, job.Id, job.Priority));
}
Console.WriteLine("\nAssignments:");
foreach (var assignment in assignments2.OrderBy(a => a.Job.Begin).Take(10))
{
Console.WriteLine(string.Format("{0}-{1} Id {2} P{3} C{4}", assignment.Job.Begin, assignment.Job.End, assignment.Job.Id, assignment.Job.Priority, assignment.Core));
}
if (merged.Count != assignments2.Count())
System.Console.WriteLine("Difference count {0}, {1}", merged.Count, assignments2.Count());
for (int i = 0; i < merged.Count() && i < assignments2.Count(); i++)
{
var a2 = assignments2.ElementAt(i);
var a = merged[i];
if (a.Job.Id != a2.Job.Id)
System.Console.WriteLine("Difference at {0} {1} {2}", i, a.Job.Begin, a2.Job.Begin);
if (i % 100 == 0) Console.ReadKey();
}
}
Console.WriteLine(string.Format("\nTotal Comparisons: {0:n}", Job.Iterations));
Console.WriteLine("Any key to continue");
Console.ReadKey();
}
}
Removed due to major bug. Reworking on it.. :P
精彩评论