Given the following model and indexes, the store contains 6M Parents with about 2 Children for each one.
Map_Recuce_Parent is working quite fast for the amount of data stored, while the Map_Reduce_Children is taking days and i do not know when it will finish :D
The SelectMany inside the map reduce is the real killer i guess, my question is : how mapping works ? what is raven actually doing that way?
Thanks
Valerio
public class Parent
{
public List<Child> Children { get; set; }
public string Attribute_P { get; set; }
}
public class Child
{
public string Attribute_C { get; set; }
public double Amount { get; set; }
}
public class Result
{
public string Attribute { get; set; }
public double Amount { get; set; }
public int Count { get; set; }
}
public class Map_Reduce_Children : AbstractIndexCreationTask<Parent, Result>
{
public Map_Reduce_Children()
{
Map = docs => from parent in docs.SelectMany(a => a.Children)
select new { Attribute = parent.Attribute_C, Count = 1 };
Reduce = results => from result in results
group result by new { result.Attribute }
into g
select new { g.Key.Attribute, Amount = g.Sum(r => r.Amount), Count = g.Sum(r => r.Count) };
}
}
public class Map_Reduce_Parent : AbstractIndexCreationTask<Parent, Result>
{
public Map_Reduce_Parent()
{
Map = docs => from parent in docs
select new { Attribute = parent.Attribute_P, Count = 1 };
Reduce = results => from result in results
group result by new { result.Attribute }
into g
select new { g.Key.Attribute, Amount = g.Sum(r => r.Amount), Count = g.Sum(r => r.Count) };
}
}