def print_stats(word_iterator):
    longest = ""
    shortest = ""
    total_length = 0
    count = 0
    
    for word in word_iterator:
        word = word.strip()
        
        if len(word) > len(longest):
            longest = word
            
        if shortest == "" or len(word) < len(shortest):
            shortest = word
            
        total_length += len(word)
        count += 1
   
    print "Number of words: " + str(count)
    print "Longest word: " + longest
    print "Shortest word: " + shortest
    print "Avg. word length: " + str(float(total_length)/count)
    
def sentence_stats(sentence):
    words = sentence.split()
    print_stats(words)    

# I've included the "english.txt" file in the examples directory (see the
# course web page) if you'd like to try out this example with it
def file_stats(filename):
    opened_file = open(filename, "r")
    print_stats(opened_file)
    opened_file.close()
    
def general_print_stats(line_iterator):
    longest = ""
    shortest = ""
    total_length = 0
    count = 0
    
    for line in line_iterator:
        line = line.strip()
        
        words = line.lower().split()
        
        for word in words:
            if len(word) > len(longest):
                longest = word
            
            if shortest == "" or len(word) < len(shortest):
                shortest = word
                
            total_length += len(word)
            count += 1
   
    print "Number of words: " + str(count)
    print "Longest word: " + longest
    print "Shortest word: " + shortest
    print "Avg. word length: " + str(float(total_length)/count)
    
def general_file_stats(filename):
    opened_file = open(filename, "r")
    general_print_stats(opened_file)
    opened_file.close()
    
    
# if you're curious, I had to clean up the data a bit and this
# is the function I used to do it
def cleanup_data(infile, outfile):
    reader = open(infile, "r")
    output = open(outfile, "w")
    
    for line in reader:
        line = line.strip()
        
        words = line.lower().split()
        cleaned = []
                
        for word in words:
            # check if the word is just alphabetical
            if word.isalpha():
                cleaned.append(word)
            
        new_sent = " ".join(cleaned)
        
        #print new_sent        
        output.write(new_sent + "\n")
    
    reader.close()
    output.close()