Map Reduce with MongoDB and NORM - a complete example

I’m posting this example because there just weren’t any full, complex examples on the web for how to use Map Reduce with NORM and MongoDB and there are some tricks you need to be aware of as you try to get Map Reduce to work through NORM to MongoDB.

The first thing you need to be aware of when using MongoDB with Norm is that using a Guid for your object key is a really bad idea. Javascript sees it as a binary type and comparisons fail. Use the provided ObjectId type for your objects and all will be well.

The second thing you need to understand is that the result of the reduce operation has a key and a value. You need to deserialize the result of the map reduce into a matching key, value object. The class MapReduceResult\ in the code below will help you construct a suitable type into which to deserialize the results.

In this example we are using Map Reduce to calculate all co-occurrences of two Concepts (think keywords or tags). We have a collection of Articles, each contains several Concepts. We want to find which Concepts occur together most often. We want each Concept to end up with a List of other Concepts that it tends to occur with in a document.

Here’s the code, I omitted the actual entity classes but it’s fairly easy to see what they are.

[csharp] using System; using System.Collections.Generic; using System.Linq; using System.Text; using log4net; using Norm; using Norm.Responses; using System.Diagnostics; using CompeticsDB; using System.Threading;

namespace WebCrawler { /// \ /// MapReduce operation to build a co-occurrence matrix. /// \ /// \ /// Each Article has a list of Concepts (like keywords) that it contains. We want a co-occurrence or adjacency graph that shows which Concepts /// are more likely to occur together in an Article. This allows us to show related Concepts when a user is viewing any partcular Concept. /// \ public class BuildConcurrence { private static ILog log = LogManager.GetLogger(“BuildCooccurrence”);

public void Execute() { log.Debug(“Building co-occurrence matrix”);

using (var db = Mongo.Create(MongoSession.ConnectionString)) { // Examine all Items in the database, for each item examine all the terms that co-occur // emit all the pairs, then aggregate them, then store the co-occurrence information back on each Concept

// First MAP each Article onto the Concept pairs that it includes … string map = @”function() { for(var i=0; i\<this.ConceptsMentioned.length; i++) { var first = this.ConceptsMentioned[i]; for(var j=0; j\<this.ConceptsMentioned.length; j++) { var second = this.ConceptsMentioned[j]; emit ({l:this.ConceptsMentioned[i]._id,r:this.ConceptsMentioned[j]._id}, 1); } } }”;

// Then reduce them simply by counting up how many occurrences we have of each pair of Concepts string reduce = @”function(key, values) { var count = values.length; return count; }”;

MapReduce mr = db.Database.CreateMapReduce();

//Represents the document passed to the //db.runCommand in the shell example MapReduceOptions options = new MapReduceOptions() { CollectionName = “Article”, Map = map, Reduce = reduce, Permanant = true, // make the collection permanent OutputCollectionName = “CoOccurrence” };

MapReduceResponse response = mr.Execute(options); log.Debug(“Query = “ + options.Query); log.Debug(response.Result); log.Debug(response.TimeMillis + “ms”); log.Debug(response.WasSuccessful ? “Success” : “Fail”); log.Debug(“Input “ + response.Counts.Input); log.Debug(“Emit “ + response.Counts.Emit); log.Debug(“Output “ + response.Counts.Output);

// This is how you debug the results of MapReduce - using print() // handy to check that your result classes match the actual results // //var test = db.Database.GetCollection(response.Result).AsQueryable(); //foreach (var x in test.Take(20)) //{ // log.Debug(print(x)); //} //Thread.Sleep(2000);

var resultList = db.Database.GetCollection\(response.Result) .AsQueryable() .OrderByDescending(x => x.value);

// Now we are storing the Co-occurrence information back into the Concepts themselves so that each has // a collection of the other Concepts that occur with it and how often that happens // NB - only need to do this one way round as we will be provided with both orderings // could perhaps change that above so that we only get one ordering of each pair

var collection = db.GetCollection\();

foreach (var x in resultList) { // Could do this FAR more efficiently!

var a = collection.AsQueryable().FirstOrDefault(y => y.Id == x._id.l); var b = collection.AsQueryable().FirstOrDefault(y => y.Id == x._id.r);

if (a == null || b == null) { // This should never happen log.Debug(“A or B was null for “ + x._id.l + “ “ + x._id.r + “ = “ + x.value); continue; }

if (a.Cooccurrences == null) a.Cooccurrences = new List\(); // Ensure the list is there CoOccurrence ccb = new CoOccurrence{ Count = x.value, Related = x._id.r}; // Create the new Co-occurrence object a.Cooccurrences.RemoveAll(cc => cc.Related == ccb.Related); // Remove the old entry a.Cooccurrences.Add(ccb); // Insert the new entry

collection.Save(a); } } }

/// \ /// This is a useful debug method for inspecting the output of MapReduce when called through NORM /// \ private string print(object obj) { Norm.BSON.Expando expando = obj as Norm.BSON.Expando; if (expando != null) { return “{“ + string.Join(“, “, expando.AllProperties().Select(prop => prop.PropertyName + “=” + print(prop.Value))) + “}”; } else { return obj.ToString(); } }

/// \ /// This is a useful type for dealing with MapReduce results. /// It takes two type parameters, one for the key and one for the value. /// The simplest possible result would use type parameters ObjectId and int /// \ private class MapReduceResult\ { public Tid _id { get; set; } public Tvalue value { get; set; }

public override string ToString() { return string.Format(“{0} {1}”, _id, value); } }

/// \ /// The key from this particular map reduce is an L, R pair /// \ private class MapReduceKey { /// \ /// One of the concepts /// \ public ObjectId l { get; set; }

/// \ /// Some other concept /// \ public ObjectId r { get; set; } }

/// \ /// Map reduce result, in this case it’s a MapReduceKey and an int /// \ private class MapReduceCoOccurrenceResult : MapReduceResult\ { }

} } [/csharp]

Wed Oct 27 2010 06:27:35 GMT-0700 (Pacific Daylight Time)

Next page: What does a Smart House do at Halloween?

Previous page: Exception: An object with the same key already exists in the ObjectStateManager. The ObjectStateManager cannot track multiple objects with the same key.