def print_stats(word_iterator): longest = "" shortest = "" total_length = 0 count = 0 for word in word_iterator: word = word.strip() if len(word) > len(longest): longest = word if shortest == "" or len(word) < len(shortest): shortest = word total_length += len(word) count += 1 print "Number of words: " + str(count) print "Longest word: " + longest print "Shortest word: " + shortest print "Avg. word length: " + str(float(total_length)/count) def sentence_stats(sentence): words = sentence.split() print_stats(words) # I've included the "english.txt" file in the examples directory (see the # course web page) if you'd like to try out this example with it def file_stats(filename): opened_file = open(filename, "r") print_stats(opened_file) opened_file.close() def general_print_stats(line_iterator): longest = "" shortest = "" total_length = 0 count = 0 for line in line_iterator: line = line.strip() words = line.lower().split() for word in words: if len(word) > len(longest): longest = word if shortest == "" or len(word) < len(shortest): shortest = word total_length += len(word) count += 1 print "Number of words: " + str(count) print "Longest word: " + longest print "Shortest word: " + shortest print "Avg. word length: " + str(float(total_length)/count) def general_file_stats(filename): opened_file = open(filename, "r") general_print_stats(opened_file) opened_file.close() # if you're curious, I had to clean up the data a bit and this # is the function I used to do it def cleanup_data(infile, outfile): reader = open(infile, "r") output = open(outfile, "w") for line in reader: line = line.strip() words = line.lower().split() cleaned = [] for word in words: # check if the word is just alphabetical if word.isalpha(): cleaned.append(word) new_sent = " ".join(cleaned) #print new_sent output.write(new_sent + "\n") reader.close() output.close()