from discotest import * def fun_map(e, params): return [(w, 1) for w in e.split()] def fun_reduce(iter, out, params): stats = {} for word, count in iter: if word in stats: stats[word] += int(count) else: stats[word] = int(count) for word, total in stats.iteritems(): out.add(word, total) job = MapReduceJob( inputfilename='sometextfile.txt', funmap=fun_map, funreduce=fun_reduce) res = job.execute() for word, frequency in result_iterator(res): print word, frequency #master = sys.argv[1] #print "Starting Disco job.." #print "Go to %s to see status of the job." % master #results = Disco(master).new_job( # name = "wordcount", # input = ["http://discoproject.org/chekhov.txt"], # map = fun_map, # reduce = fun_reduce).wait() # #print "Job done. Results:" #for word, frequency in result_iterator(results): # print word, frequency