| 2-Oct-2017 | Like this? Dislike this? Let me know | 
MongoDB is document data model database and as such requires no formal schema for each document in a collection. If you are unfamiliar with these terms, just consider a document like a "row" but with rich structure and a collection as a table. Here is an example of a document as it might appear in the MongoDB command line interface:
    {
      "_id" : ObjectId("59d2465474d1e2d00ebebee9"),
      "device":"D1",
      "created": ISODate("2017-10-02T13:59:48.194Z"),
      "participants": [ "A1","A8","A45" ],
      "data": {
        "color":"red",
        "maxAmps":10,
        "maxVolts":240
      }
    }
There are a number of design benefits of a schema-free document model but for this article we will focus on two:
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef4"),
      "device" : "D1",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "color" : "red",  "maxAmps" : 10, "maxVolts" : 240 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef5"),
      "device" : "D2",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "color" : "green", "maxAmps" : 10, "maxVolts" : 240 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef6"),
      "device" : "D3",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "weight" : 12, "maxAmps" : 10, "maxVolts" : 240 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef7"),
      "device" : "D3",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "weight" : 12, "maxAmps" : 10, "maxVolts" : 120 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef8"),
      "device" : "D4",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "weight" : 12 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebef9"),
      "device" : "D5",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "maxAmps" : 10, "temp" : 77 }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebefa"),
      "device" : "D6",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "maxAmps" : 10, "temp" : 90, "fish" : "cod" }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebefb"),
      "device" : "D7",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "OS" : "Linux", "language" : "Java" }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebefc"),
      "device" : "D8",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "OS" : "Linux", "language" : "Python" }
    }
    {
      "_id" : ObjectId("59d24a3c74d1e2d00ebebefd"),
      "device" : "D9",
      "ts" : ISODate("2017-10-02T14:16:28.028Z"),
      "data" : { "OS" : "Windows", "language" : "Python" }
    }
    db.foo.aggregate([
      {$group: {_id: null,
          "distinctColor":{$addToSet: "$data.color"},
          "distinctMaxAmps":{$addToSet: "$data.maxAmps"},
          "distinctMaxVolts":{$addToSet: "$data.maxVolts"}  }}
    ]);
    {
      "_id" : null,
      "distinctColor" : [ "green", "red" ],
      "distinctMaxAmps" : [ 10 ],
      "distinctMaxVolts" : [ 120, 240 ]
    }
    // This is the data field:
    > db.foo.aggregate([{$project: {_id:0, z: "$data"}} ]);
    { "z" : { "x" : "A", "y" : 1 } }
    // How do we dynamically fetch keys x and y from "data"?  Use $objectToArray!
    > db.foo.aggregate([{$project: {_id:0, z: {$objectToArray: "$data"}}} ]);
    { "z" : [ { "k" : "x", "v" : "A" }, { "k" : "y", "v" : 1 } ] }
    // ... and going in the other direction.  
    > db.foo.aggregate([{$project: {_id:0, z: "$data2"}} ]);
    { "z" : [ [ "crop", "corn" ], [ "amount", 113 ] ] }
    // $arrayToObject supports two input forms:
    // array of arrays:  [ [fname1,val1], [fname2,val2], ... ]
    // array of k-v objects:  [ {k:"fname1",v:val1}, {k:"fname2",v:val2}, etc. ]
    > db.foo.aggregate([{$project: {_id:0, z: {$arrayToObject: "$data2"}}} ]);
    { "z" : { "crop" : "corn", "amount" : 113 } }
Let's begin to answer our question by first "breaking down" the fields inside the data substructure.
    db.foo.aggregate([
        {$project: {x: {$objectToArray: "$data"}}}
    ]);
    {
      "_id" : ObjectId("59d253b074d1e2d00ebebf2a"),
      "x" : [
        { "k" : "color", "v" : "red" },
        { "k" : "maxAmps", "v" : 10 },
        { "k" : "maxVolts", "v" : 240 }
      ]
    }
    {
      "_id" : ObjectId("59d253b074d1e2d00ebebf2a"),
      "x" : [
        { "k" : "color", "v" : "green" },
        { "k" : "maxAmps", "v" : 10 },
        { "k" : "maxVolts", "v" : 240 }
      ]
    }
    // etc.....
    db.foo.aggregate([
        {$project: {x: {$objectToArray: "$data"}}}
        ,{$unwind: "$x"}
    ]);
    {
        "_id" : ObjectId("59d253b074d1e2d00ebebf2a"),
        "x" : { "k" : "color", "v" : "red" }
    }
    {
        "_id" : ObjectId("59d253b074d1e2d00ebebf2a"),
        "x" : { "k" : "maxAmps", "v" : 10 }
    }
    {
       "_id" : ObjectId("59d253b074d1e2d00ebebf2a"),
        "x" : { "k" : "maxVolts", "v" : 240 }
    }
    {
        "_id" : ObjectId("59d2549f74d1e2d00ebebf35"),
        "x" : { "k" : "color", "v" : "green" }
    }
    // etc.....
    db.foo.aggregate([
        {$project: {x: {$objectToArray: "$data"}}}
        ,{$unwind: "$x"}
        ,{$group: {_id: "$x.k", distinctVals: {$addToSet: "$x.v"}}}
    ]);
    { "_id" : "language", "distinctVals" : [ "Python", "Java" ] }
    { "_id" : "OS", "distinctVals" : [ "Windows", "Linux" ] }
    { "_id" : "temp", "distinctVals" : [ 90, 77 ] }
    { "_id" : "weight", "distinctVals" : [ 12 ] }
    { "_id" : "maxVolts", "distinctVals" : [ 120, 240 ] }
    { "_id" : "fish", "distinctVals" : [ "cod" ] }
    { "_id" : "maxAmps", "distinctVals" : [ 10 ] }
    { "_id" : "color", "distinctVals" : [ "green", "red" ] }
We can use either form of $arrayToObject to turn the RHS of each doc into an LHS:
// For k-v, must have two fields k and v only (no _id or anything else)
,{$project: {_id:0, k: "$_id", v:"$distinctVals"}}  //
// Now take the resulting {k: "fname", v: value}, wrap it in an array
// (note the [] wrapping $$CURRENT) and pass to $arrayToObject:
,{$project: {xx2: {$arrayToObject: [ [ "$$CURRENT"] ] }}}
// For array of array, perhaps a little simpler because not using $$CURRENT:
,{$project: { _id:0, xx: [ [ "$_id", "$distinctVals" ] ] }}
,{$project: {xx2: {$arrayToObject: "$xx"}}}
    db.foo.aggregate([
    {$project: {x: {$objectToArray: "$data"}}}
    ,{$unwind: "$x"}
    ,{$group: {_id: "$x.k", distinctVals: {$addToSet: "$x.v"}}}
    ,{$project: { _id:0, xx: [ [ {$concat: [ "distinct_", "$_id"]}, "$distinctVals" ] ] }}
    ]);
    
    { "xx" : [ [ "distinct_language", [ "Python", "Java" ] ] ] }
    { "xx" : [ [ "distinct_OS", [ "Windows", "Linux" ] ] ] }
    { "xx" : [ [ "distinct_temp", [ 90, 77 ] ] ] }
    { "xx" : [ [ "distinct_weight", [ 12 ] ] ] }
    { "xx" : [ [ "distinct_maxVolts", [ 120, 240 ] ] ] }
    { "xx" : [ [ "distinct_fish", [ "cod" ] ] ] }
    { "xx" : [ [ "distinct_maxAmps", [ 10 ] ] ] }
    { "xx" : [ [ "distinct_color", [ "green", "red" ] ] ] }
    // Note that typically we'd merge the last $project into $replaceRoot for
    // efficiency and a shorter pipeline, e.g.
    //     ,{$replaceRoot: { newRoot: {$arrayToObject: "$xx"}}}
    // but it's expanded out below so you can see what's going on.
    db.foo.aggregate([
      {$project: {x: {$objectToArray: "$data"}}}
      ,{$unwind: "$x"}
      ,{$group: {_id: "$x.k", distinctVals: {$addToSet: "$x.v"}}}
      ,{$project: { _id:0, xx: [ [ {$concat: [ "distinct_", "$_id"]}, "$distinctVals" ] ] }}
      ,{$project: {xx2: {$arrayToObject: "$xx"}}}
      ,{$replaceRoot: { newRoot: "$xx2"}}
			]);
    { "distinct_language" : [ "Python", "Java" ] }
    { "distinct_OS" : [ "Windows", "Linux" ] }
    { "distinct_temp" : [ 90, 77 ] }
    { "distinct_weight" : [ 12 ] }
    { "distinct_maxVolts" : [ 120, 240 ] }
    { "distinct_fish" : [ "cod" ] }
    { "distinct_maxAmps" : [ 10 ] }
    { "distinct_color" : [ "green", "red" ] }
If you understood how $$CURRENT was used in the example above, then you should also realize that you don't necessarily need to start with a substructure (although it's always a really good idea to separate truly dynamic fields into a substructure). If your data looked like this:
    { 
      "_id" : ObjectId("59d2465474d1e2d00ebebee9"),
      "name":"buzz",
      "city":"NYC",
      "favoriteCuisine":"Italian"
    }
    { 
      "_id" : ObjectId("59d2465474d1e2d00ebebeea"),
      "name":"buzz",
      "city":"Miami",
      "favoriteCuisine":"Cuban"
    }
    db.foo.aggregate([
      {$project: {x: {$objectToArray: "$$CURRENT"}}}
      ,{$unwind: "$x"}
      ,{$match: {"x.k": {$ne: "_id"}}}
      // etc....
    // Note!  Holding back _id will NOT work:
    //     {$project: {_id:0, x: {$objectToArray: "$$CURRENT"}}}
    // because $objectToArray will have already operated upon $$CURRENT and
    // {k:"_id", v:ObjectId("...")} is now "buried" in the output array.
Like this? Dislike this? Let me know