2013年4月12日金曜日

開発環境

初めてのコンピュータサイエンス(Jennifer CampbellPaul GriesJason MontojoGreg Wilson(著)長尾 高弘(翻訳))の11章(探索とソート)の11.7(練習問題)を解いてみる。

1.

ソート + 二分探索 * 回数 < 線形探索(組み込みの探索) * 回数
N log2 N + n * log2 N < n * N
N log2 N < n(N - log2 N)
(N log2 N) / (N - log2 N) < n
log2 N < n (Nが十分大きい時)

おおよそlog2 N回以上探索した時、組み込みの探索を使うよりも、ソートしてから二分探索した方が高速になる。

確認

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

import math, time

def binary_search(v, L):
    i = 0
    j = len(L) - 1
    
    while i != j + 1:
        m = (i + j) // 2
        if L[m] < v:
            i = m + 1
        else:
            j = m - 1
    
    if 0 <= i < len(L) and L[i] == v:
        return i
    else:
        return - 1

N = 1000
l = [9,0,8,1,7,2,6,4,5] * N + [10]
count = math.floor(math.log2(10 * N + 1))
print("回数、二分探索、組み込み関数(L.index)、比率(二分探索/組み込み関数)")
for n in range(count - 10, count + 10):
    l_1 = l[:]
    start = time.time()
    l_1.sort()
    for x in range(n):
        binary_search(10, l_1)
    t1 = time.time() - start
    start = time.time()
    for x in range(n):
        l.index(10)
    t2 = time.time() - start
    print("{0}, {1:.10f}秒, {2:.10f}秒, {3:.4f}"
        .format(n, t1, t2, t1 / t2))

入出力結果(Terminal)

$ ./sample.py
回数、二分探索、組み込み関数(L.index)、比率(二分探索/組み込み関数)
3, 0.0024380684秒, 0.0007178783秒, 3.3962
4, 0.0024659634秒, 0.0009291172秒, 2.6541
5, 0.0023989677秒, 0.0011911392秒, 2.0140
6, 0.0024108887秒, 0.0013840199秒, 1.7419
7, 0.0025100708秒, 0.0016059875秒, 1.5629
8, 0.0024089813秒, 0.0019021034秒, 1.2665
9, 0.0024089813秒, 0.0021131039秒, 1.1400
10, 0.0024271011秒, 0.0023050308秒, 1.0530
11, 0.0024609566秒, 0.0025429726秒, 0.9677
12, 0.0024697781秒, 0.0027589798秒, 0.8952
13, 0.0024509430秒, 0.0030219555秒, 0.8110
14, 0.0024359226秒, 0.0032019615秒, 0.7608
15, 0.0024290085秒, 0.0034270287秒, 0.7088
16, 0.0024490356秒, 0.0036599636秒, 0.6691
17, 0.0024840832秒, 0.0038850307秒, 0.6394
18, 0.0024979115秒, 0.0044231415秒, 0.5647
19, 0.0024981499秒, 0.0043418407秒, 0.5754
20, 0.0024969578秒, 0.0045928955秒, 0.5437
21, 0.0025179386秒, 0.0048110485秒, 0.5234
22, 0.0025010109秒, 0.0050280094秒, 0.4974
$

だいたい真ん中(log2 N回)あたりで比率が1以下になってるから考え方はあってるのかなぁ。

2.

選択ソート

[1, 5, 4, 3, 7, 6, 2]
[1, 2, 4, 3, 7, 6, 5]
[1, 2, 3, 4, 7, 6, 5]
[1, 2, 3, 4, 7, 6, 5]
[1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]

挿入ソート

[6, 5, 4, 3, 7, 1, 2]
[5, 6, 4, 3, 7, 1, 2]
[4, 5, 6, 3, 7, 1, 2]
[3, 4, 5, 6, 7, 1, 2]
[3, 4, 5, 6, 7, 1, 2]
[1, 3, 4, 5, 6, 7, 2]
[1, 2, 3, 4, 5, 6, 7]

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

def selection_sort(L):
    i = 0
    while i != len(L):
        smallest = find_min(L, i)
        L[i], L[smallest] = L[smallest], L[i]
        i += 1
        print(L)

def find_min(L, b):
    smallest = b
    i = b + 1
    while i != len(L):
        if L[i] < L[smallest]:
            smallest = i
        i += 1
    return smallest

def insertion_sort(L):
    i = 0
    while i != len(L):
        insert(L, i)
        i += 1
        print(L)

def insert(L, b):
    i = b
    while i != 0 and L[i - 1] >= L[b]:
        i -= 1
    value = L[b]
    del L[b]
    L.insert(i, value)

l1 = [6, 5, 4, 3, 7, 1, 2]
l2 = l1[:]

selection_sort(l1)
print()
insertion_sort(l2)

入出力結果(Terminal)

$ ./sample.py
[1, 5, 4, 3, 7, 6, 2]
[1, 2, 4, 3, 7, 6, 5]
[1, 2, 3, 4, 7, 6, 5]
[1, 2, 3, 4, 7, 6, 5]
[1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]

[6, 5, 4, 3, 7, 1, 2]
[5, 6, 4, 3, 7, 1, 2]
[4, 5, 6, 3, 7, 1, 2]
[3, 4, 5, 6, 7, 1, 2]
[3, 4, 5, 6, 7, 1, 2]
[1, 3, 4, 5, 6, 7, 2]
[1, 2, 3, 4, 5, 6, 7]
$

3.

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

import nose

def bubble_sort(L):
    l = len(L) - 1
    while l > 0:
        for i in range(l):
            if L[i] > L[i+1]:
                L[i], L[i+1] = L[i+1], L[i]
            print("\t{0}".format(L))
        l -= 1
        print(L)

def run(original, expected):
    bubble_sort(original)
    assert original == expected

def test_empty():
    run([], [])

def test_one():
    run([1], [1])

def test_two_ordered():
    run([1, 2], [1, 2])

def test_two_reversed():
    run([2, 1], [1, 2])

def test_three_identical():
    run([3, 3, 3], [3, 3, 3])

def test_three_split():
    run([3, 0, 3], [0, 3, 3])
    
if __name__ == '__main__':
    bubble_sort([6, 5, 4, 3, 7, 1, 2])
    print("ここからnoseテスト")
    nose.runmodule()

入出力結果(Terminal)

$ ./sample.py
 [5, 6, 4, 3, 7, 1, 2]
 [5, 4, 6, 3, 7, 1, 2]
 [5, 4, 3, 6, 7, 1, 2]
 [5, 4, 3, 6, 7, 1, 2]
 [5, 4, 3, 6, 1, 7, 2]
 [5, 4, 3, 6, 1, 2, 7]
[5, 4, 3, 6, 1, 2, 7]
 [4, 5, 3, 6, 1, 2, 7]
 [4, 3, 5, 6, 1, 2, 7]
 [4, 3, 5, 6, 1, 2, 7]
 [4, 3, 5, 1, 6, 2, 7]
 [4, 3, 5, 1, 2, 6, 7]
[4, 3, 5, 1, 2, 6, 7]
 [3, 4, 5, 1, 2, 6, 7]
 [3, 4, 5, 1, 2, 6, 7]
 [3, 4, 1, 5, 2, 6, 7]
 [3, 4, 1, 2, 5, 6, 7]
[3, 4, 1, 2, 5, 6, 7]
 [3, 4, 1, 2, 5, 6, 7]
 [3, 1, 4, 2, 5, 6, 7]
 [3, 1, 2, 4, 5, 6, 7]
[3, 1, 2, 4, 5, 6, 7]
 [1, 3, 2, 4, 5, 6, 7]
 [1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
 [1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
ここからnoseテスト
......
----------------------------------------------------------------------
Ran 6 tests in 0.002s

OK
$

4.

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

import nose

def bubble_sort(L):
    l = len(L) - 1
    n = 0
    while n < l:
        for i in range(l, n, -1):
            if L[i] < L[i - 1]:
                L[i - 1], L[i] = L[i], L[i - 1]
            print("\t{0}".format(L))
        n += 1
        print(L)

def run(original, expected):
    bubble_sort(original)
    assert original == expected

def test_empty():
    run([], [])

def test_one():
    run([1], [1])

def test_two_ordered():
    run([1, 2], [1, 2])

def test_two_reversed():
    run([2, 1], [1, 2])

def test_three_identical():
    run([3, 3, 3], [3, 3, 3])

def test_three_split():
    run([3, 0, 3], [0, 3, 3])
    
if __name__ == '__main__':
    bubble_sort([6, 5, 4, 3, 7, 1, 2])
    print("ここからnoseテスト")
    nose.runmodule()

入出力結果(Terminal)

$ ./sample.py
 [6, 5, 4, 3, 7, 1, 2]
 [6, 5, 4, 3, 1, 7, 2]
 [6, 5, 4, 1, 3, 7, 2]
 [6, 5, 1, 4, 3, 7, 2]
 [6, 1, 5, 4, 3, 7, 2]
 [1, 6, 5, 4, 3, 7, 2]
[1, 6, 5, 4, 3, 7, 2]
 [1, 6, 5, 4, 3, 2, 7]
 [1, 6, 5, 4, 2, 3, 7]
 [1, 6, 5, 2, 4, 3, 7]
 [1, 6, 2, 5, 4, 3, 7]
 [1, 2, 6, 5, 4, 3, 7]
[1, 2, 6, 5, 4, 3, 7]
 [1, 2, 6, 5, 4, 3, 7]
 [1, 2, 6, 5, 3, 4, 7]
 [1, 2, 6, 3, 5, 4, 7]
 [1, 2, 3, 6, 5, 4, 7]
[1, 2, 3, 6, 5, 4, 7]
 [1, 2, 3, 6, 5, 4, 7]
 [1, 2, 3, 6, 4, 5, 7]
 [1, 2, 3, 4, 6, 5, 7]
[1, 2, 3, 4, 6, 5, 7]
 [1, 2, 3, 4, 6, 5, 7]
 [1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
 [1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7]
ここからnoseテスト
......
----------------------------------------------------------------------
Ran 6 tests in 0.002s

OK
$

5.

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

import time

def selection_sort(L):
    i = 0
    while i != len(L):
        smallest = find_min(L, i)
        L[i], L[smallest] = L[smallest], L[i]
        i += 1

def find_min(L, b):
    smallest = b
    i = b + 1
    while i != len(L):
        if L[i] < L[smallest]:
            smallest = i
        i += 1
    return smallest

def insertion_sort(L):
    i = 0
    while i != len(L):
        insert(L, i)
        i += 1

def insert(L, b):
    i = b
    while i != 0 and L[i - 1] >= L[b]:
        i -= 1
    value = L[b]
    del L[b]
    L.insert(i, value)

def bubble_sort(L):
    l = len(L) - 1
    while l > 0:
        for i in range(l):
            if L[i] > L[i+1]:
                L[i], L[i+1] = L[i+1], L[i]
        l -= 1

def print_times(L):
    print(len(L))
    for func in (insertion_sort, selection_sort, bubble_sort):
        L_copy = L[:]
        t1 = time.time()
        func(L_copy)
        t2 = time.time()
        print("\t{0:15} {1:.1f}".format(func.__name__, (t2 - t1) * 1000))

if __name__ == '__main__':
    for list_size in [10, 1000, 2000, 3000]:
        L = [x for x in range(list_size)]
        L.reverse()
        print_times(L)

入出力結果(Terminal)

$ ./sample.py
10
 insertion_sort  0.1
 selection_sort  0.1
 bubble_sort     0.1
1000
 insertion_sort  902.4
 selection_sort  552.6
 bubble_sort     913.5
2000
 insertion_sort  1863.0
 selection_sort  2133.3
 bubble_sort     4057.8
3000
 insertion_sort  4160.9
 selection_sort  4995.2
 bubble_sort     8486.8
$

リストが大きくなるにつれて、速度に差が出てくる。(速い順に挿入ソート、選択ソート、バブルソート)特にバブルソートは他の2つのソート方法に比べて遅いみたい。

6.

ソートされている場合。

コピーは0回。

比較は、

(N - 1) + (N - 2) + ・・・+ 2 = (N - 1 + 2)(N - 2) / 2 = (N - 2)(N - 3) / 2 回

逆順になっている場合。

比較の回数はソートされている場合と同じ、(N - 2)(N - 3) / 2 回

コピーの回数は比較の都度コピーされるので、

(N - 2)(N - 3) / 2 * 2 =(N - 2)(N - 3)回

7.

値の挿入するさいに、挿入する位置より上野値を全て1つずつずらさなければならないという事が考慮されてない。

8.

Nの階乗。要素が一意じゃない場合には、Nの階乗通りの中に、重複するリストができてしまう。

9.

コード(BBEdit)

sample.py

#!/usr/bin/env python3.3
#-*- coding: utf-8 -*-

import time

def selection_sort(L):
    i = 0
    while i != len(L):
        smallest = find_min(L, i)
        L[i], L[smallest] = L[smallest], L[i]
        i += 1

def find_min(L, b):
    smallest = b
    i = b + 1
    while i != len(L):
        if L[i] < L[smallest]:
            smallest = i
        i += 1
    return smallest

def insertion_sort(L):
    i = 0
    while i != len(L):
        insert(L, i)
        i += 1

def insert(L, b):
    i = b
    while i != 0 and L[i - 1] >= L[b]:
        i -= 1
    value = L[b]
    del L[b]
    L.insert(i, value)

def bubble_sort(L):
    l = len(L) - 1
    while l > 0:
        for i in range(l):
            if L[i] > L[i+1]:
                L[i], L[i+1] = L[i+1], L[i]
        l -= 1

def merge(L1, L2):
    newL = []
    i1 = 0
    i2 = 0
    l = len(L1) + len(L2)
    i = 0
    while i < l:
        if i1 >= len(L1):
            newL.append(L2[i2])
            i2 += 1
        elif i2 >= len(L2):
            newL.append(L1[i1])
            i1 += 1
        elif L1[i1] <= L2[i2]:
            newL.append(L1[i1])
            i1 += 1
        else:
            newL.append(L2[i2])
            i2 += 1
        i += 1

    return newL

def mergesort(L):
    workspace = []
    for i in range(len(L)):
        workspace.append([L[i]])
    i = 0
    while i< len(workspace) - 1:
        L1 = workspace[i]
        L2 = workspace[i+1]
        newL = merge(L1, L2)
        workspace.append(newL)
        i += 2
    if len(workspace) != 0:
        L[:] = workspace[-1][:]

def default_merge(L1, L2):
    newL = []
    i1 = 0
    i2 = 0
    while i1 != len(L1) and i2 != len(L2):
        if L1[i1] <= L2[i2]:
            newL.append(L1[i1])
            i1 += 1
        else:
            newL.append(L2[i2])
            i2 += 1

    newL.extend(L1[i1:])
    newL.extend(L2[i2:])

    return newL

def default_mergesort(L):
    workspace = []
    for i in range(len(L)):
        workspace.append([L[i]])
    i = 0
    while i< len(workspace) - 1:
        L1 = workspace[i]
        L2 = workspace[i+1]
        newL = default_merge(L1, L2)
        workspace.append(newL)
        i += 2
    if len(workspace) != 0:
        L[:] = workspace[-1][:]

def builtin_sort(L):
    L.sort()
def print_times(L):
    print(len(L))
    for func in (insertion_sort, selection_sort, bubble_sort,
                 mergesort, default_mergesort, builtin_sort):
        L_copy = L[:]
        t1 = time.time()
        func(L_copy)
        t2 = time.time()
        print("\t{0:20} {1:>10.1f}".format(func.__name__, (t2 - t1) * 1000))

if __name__ == '__main__':
    for list_size in [10, 1000, 2000, 3000]:
        L = [x for x in range(list_size)]
        L.reverse()
        print_times(L)

入出力結果(Terminal)

$ ./sample.py
10
 insertion_sort              0.0
 selection_sort              0.0
 bubble_sort                 0.0
 mergesort                   0.1
 default_mergesort           0.1
 builtin_sort                0.0
1000
 insertion_sort            141.2
 selection_sort            180.1
 bubble_sort               285.5
 mergesort                   9.5
 default_mergesort           6.7
 builtin_sort                0.0
2000
 insertion_sort            569.7
 selection_sort            723.6
 bubble_sort              1218.7
 mergesort                  21.2
 default_mergesort          14.2
 builtin_sort                0.1
3000
 insertion_sort           1409.0
 selection_sort           1644.7
 bubble_sort              2733.7
 mergesort                  32.5
 default_mergesort          22.1
 builtin_sort                0.1
$

速度が少し遅くなってるし、書き換え方法違ってるかも。。

0 コメント:

コメントを投稿