2016年8月15日月曜日

開発環境

Think Python (Allen B. Downey (著)、 O'Reilly Media)のChapter 13.(Case Study: Data Structure Selection)のExercises 13-3.(No. 2926)を取り組んでみる。

Exercises 13-3.(No. 2926)

コード(Emacs)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import requests
import string


def get_words(filename):
    words = {}
    with open(filename) as f:
        flag = True
        for line in f:
            if flag:
                if line.startswith('***'):
                    flag = False
                continue
            for ch in string.punctuation:
                line = line.replace(ch, ' ')
            chars = string.whitespace + string.punctuation
            for word in line.split():
                word = word.strip(chars).lower()
                words[word] = words.get(word, 0) + 1
    return words


if __name__ == '__main__':
    filename = 'History_of_a_Six_Weeks_Tour_by_Shelley_and_Shelley.txt'
    words = get_words(filename)
    print('The 20 most frequently used words in the book.')
    count = {}
    for k, v in words.items():
        count[v] = count.get(v, []) + [k]
    n = 0
    for k in sorted(count.keys(), reverse=True):
        print('{0}: {1}'.format(k, ', '.join(count[k])))
        n += len(count[k])
        if n >= 20:
            break

入出力結果(Terminal, IPython)

$ ./sample3.py
The 20 most frequently used words in the book.
1870: the
985: of
828: and
494: a
486: to
414: we
399: in
252: that
246: with
229: which
211: was
196: on
185: it
179: this
172: at
168: is
163: our
159: by
155: from
153: as
$

0 コメント:

コメントを投稿