#!/usr/bin/env python # # This is a very basic mapper that splits a line of prose into # individual words. It is written in Python. Its output can be # used in counting the occurrences of common words in a text. # import sys def generateWords(line, delims): startidx = 0 curidx = 0 while curidx < len(line): if line[curidx] in delims: yield line[startidx:curidx] while curidx < len(line) and line[curidx] in delims: curidx += 1 startidx = curidx curidx += 1 yield line[startidx:] while True: line = sys.stdin.readline() # Break on EOF. if line == "": break for word in generateWords(line, delims= ' \n\t'): if len(word) > 0: print '%s\t%s' % (word, 1) sys.stdout.flush()