27-Sep-2018 | Like this? Dislike this? Let me know |
A Microsoft Excel Pivot table is as much -- if not more -- of a GUI control interface as it is a dimensional data aggregation engine. It's very easy to move and format rows and columns to present the rolled up data just the way you want.
MongoDB, of course, is just a database -- no specialized "pivoting GUI." But very often we want to extract the same kind of data from the database, and to do so efficiently. This article demonstrates the use of the powerful $facet operator and less than 60 lines of javascript to programmatically create an aggregation framework pipeline that does the heavy lifting to yield pivot-table-esque output.
{region: "A", state: "MA", year: 2001, q1:1, q2:2, q3:3, q4:4} ,{region: "A", state: "NH", year: 2001, q1:6, q2:2, q3:3, q4:4} ,{region: "A", state: "CT", year: 2001, q1:1, q2:2, q3:3, q4:4} ,{region: "B", state: "NY", year: 2001, q1:8, q2:2, q3:3, q4:4} ,{region: "B", state: "NJ", year: 2001, q1:1, q2:2, q3:3, q4:4} ,{region: "A", state: "MA", year: 2002, q1:52, q2:32, q3:43, q4:54} ,{region: "A", state: "NH", year: 2002, q1:21, q2:32, q3:43, q4:54} ,{region: "A", state: "CT", year: 2002, q1:67, q2:32, q3:43, q4:54} ,{region: "B", state: "NY", year: 2002, q1:8, q2:32, q3:43, q4:54} ,{region: "B", state: "NJ", year: 2002, q1:9, q2:32, q3:43, q4:54}
db.collection.aggregate([ {$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ]} { "_id" : "B", "tot_q1" : 26, "tot_q2" : 68 } { "_id" : "A", "tot_q1" : 148, "tot_q2" : 102 } db.collection.aggregate([ {$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ]} { "_id" : "NJ", "tot_q1" : 10, "tot_q2" : 34 } { "_id" : "CT", "tot_q1" : 68, "tot_q2" : 34 } { "_id" : "NH", "tot_q1" : 27, "tot_q2" : 34 } { "_id" : "NY", "tot_q1" : 16, "tot_q2" : 34 } { "_id" : "MA", "tot_q1" : 53, "tot_q2" : 34 }
db.collection.aggregate([ {$facet: { "byRegion": [ {$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byState": [ {$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ] }} ]); { "byRegion" : [ {"_id" : "B","tot_q1" : 26,"tot_q2" : 68}, {"_id" : "A","tot_q1" : 148,"tot_q2" : 102} ], "byState" : [ {"_id" : "NJ","tot_q1" : 10,"tot_q2" : 34}, {"_id" : "CT","tot_q1" : 68,"tot_q2" : 34}, {"_id" : "NH","tot_q1" : 27,"tot_q2" : 34}, {"_id" : "NY","tot_q1" : 16,"tot_q2" : 34}, {"_id" : "MA","tot_q1" : 53,"tot_q2" : 34} ] }
db.collection.aggregate([ {$match: {"year": 2002}}, // only get 2002 {$facet: { "byRegion": [ {$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byState": [ {$group: {_id: "$state", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ] }} ]); { "byRegion" : [ {"_id" : "B","tot_q1" : 17,"tot_q2" : 64}, {"_id" : "A","tot_q1" : 140,"tot_q2" : 96} ], "byState" : [ {"_id" : "NJ","tot_q1" : 9,"tot_q2" : 32}, {"_id" : "CT","tot_q1" : 67,"tot_q2" : 32}, {"_id" : "NH","tot_q1" : 21,"tot_q2" : 32}, {"_id" : "NY","tot_q1" : 8,"tot_q2" : 32}, {"_id" : "MA","tot_q1" : 52,"tot_q2" : 32} ] }
db.collection.aggregate([ {$facet: { "byRegion": [ {$group: {_id: "$region", tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byRegionYear": [ {$group: {_id: {region:"$region",year:"$year"}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byRegionState": [ {$group: {_id: {region:"$region",year:"$state"}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], //... }} ]);
db.collection.aggregate([ {$facet: { "byRegion": [ {$group: {_id: "$region", x: {$push: {q1:"$q1", q2:"$q2", state:"$state",year:"$year"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byRegionYear": [ {$group: {_id: {region:"$region",year:"$year"}, x: {$push: {q1:"$q1", q2:"$q2", state:"$state"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], "byRegionState": [ {$group: {_id: {region:"$region",year:"$state"}, x: {$push: {q1:"$q1", q2:"$q2", year:"$year"}}, tot_q1: {$sum:"$q1"}, tot_q2: {$sum:"$q2"}} } ], //... }} ]); "byRegion" : [ { "_id" : "B", "x" : [ {"state" : "NY","year" : 2001,"q1" : 8,"q2" : 2 }, {"state" : "NJ","year" : 2001,"q1" : 1,"q2" : 2 }, {"state" : "NY","year" : 2002,"q1" : 8,"q2" : 32}, {"state" : "NJ","year" : 2002,"q1" : 9,"q2" : 32} ], "tot_q1" : 26, "tot_q2" : 68 }, // ...
function getCombinations(items) { var result = []; var f = function(keyarr, items, lvl) { for (var i = 0; i < items.length; i++) { var nka = keyarr.concat(items[i]); result.push(nka); f(nka, items.slice(i + 1), lvl+1); } } f([], items, 0); return result; } function createPivotFacets(dims, measures) { var fcts = {}; // This is where we build the $project stage to turn // sum_q1: nnn // avg_q1: nnn // max_q1: nnn // into // q1: { sum:nnn, avg:nnn, ... } // Only need to do this once; the $project is the same for all // variations of $group. var prj = {_id:1, _x:1, _n:1}; measures.forEach(function(m) { var z = {}; ["sum","avg","max","min"].forEach(function(f) { z[f] = "$"+f+"_"+m; prj[m] = z; }) }); var combinations = getCombinations(dims); for (var i = 0; i < combinations.length; i++) { var karr = combinations[i]; var parr = dims.filter(function(i) {return karr.indexOf(i) < 0;}); var grp = {}; var idd = {}; karr.forEach(function(k) { idd[k] = "$" + k; }); grp['_id'] = idd; if(parr.length > 0) { var pdd = {}; measures.forEach(function(m) { pdd[m] = "$"+m }); parr.forEach(function(k) { pdd[k] = "$" + k; }); grp['_x'] = {"$push": pdd}; } grp['_n'] = {"$sum":1}; // handy but basically {$size: "$_x"} measures.forEach(function(m) { ["sum","avg","max","min"].forEach(function(f) { var z = {}; z["$"+f] = "$"+m; grp[f+"_"+m] = z; }); }); fcts[karr.join(':')] = [ {$group:grp}, {$project:prj} ]; } return {"$facet":fcts}; } pipe = [ createPivotFacets(["region","year","state"], ["q1","q2","q3"]) ]; db.collection.aggregate(pipe); { "region" : [ { "_id" : { "region" : "B" }, "_x" : [ {"q1" : 8,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NY"}, {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NJ"}, {"q1" : 8,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NY"}, {"q1" : 9,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NJ"} ], "_n" : 4, "q1" : {"sum" : 26,"avg" : 6.5,"max" : 9,"min" : 1}, "q2" : {"sum" : 68,"avg" : 17,"max" : 32,"min" : 2}, "q3" : {"sum" : 92,"avg" : 23,"max" : 43,"min" : 3} }, { "_id" : { "region" : "A" }, "_x" : [ {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "MA"}, {"q1" : 6,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "NH"}, {"q1" : 1,"q2" : 2,"q3" : 3,"year" : 2001,"state" : "CT"}, {"q1" : 52,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "MA"}, {"q1" : 21,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "NH"}, {"q1" : 67,"q2" : 32,"q3" : 43,"year" : 2002,"state" : "CT"} ], "_n" : 6, "q1" : {"sum" : 148,"avg" : 24.666666666666668,"max" : 67,"min" : 1}, "q2" : {"sum" : 102,"avg" : 17,"max" : 32,"min" : 2}, "q3" : {"sum" : 138,"avg" : 23,"max" : 43,"min" : 3} } ], "region:year" : [ { "_id" : { "region" : "B", "year" : 2002 }, "_x" : [ {"q1" : 8,"q2" : 32,"q3" : 43,"state" : "NY"}, {"q1" : 9,"q2" : 32,"q3" : 43,"state" : "NJ"} ], "_n" : 2, "q1" : {"sum" : 17,"avg" : 8.5,"max" : 9,"min" : 8}, "q2" : {"sum" : 64,"avg" : 32,"max" : 32,"min" : 32}, "q3" : {"sum" : 86,"avg" : 43,"max" : 43,"min" : 43} }, { "_id" : { "region" : "A", "year" : 2002 }, "_x" : [ {"q1" : 52,"q2" : 32,"q3" : 43,"state" : "MA"}, {"q1" : 21,"q2" : 32,"q3" : 43,"state" : "NH"}, {"q1" : 67,"q2" : 32,"q3" : 43,"state" : "CT"} ], "_n" : 3, "q1" : {"sum" : 140,"avg" : 46.666666666666664,"max" : 67,"min" : 21}, "q2" : {"sum" : 96,"avg" : 32,"max" : 32,"min" : 32}, "q3" : {"sum" : 129,"avg" : 43,"max" : 43,"min" : 43} }, // ...
There is room for improvement here but it is left as an exercise to the reader to experiment. Suggestions:
Like this? Dislike this? Let me know