開発環境
- OS X Lion - Apple(OS)
- BBEdit - Bare Bones Software, Inc., Emacs(Text Editor)
- プログラミング言語: Python
初めてのコンピュータサイエンス(Jennifer Campbell、Paul Gries、Jason Montojo、Greg Wilson(著)長尾 高弘(翻訳))の11章(探索とソート)の11.7(練習問題)を解いてみる。
1.
ソート + 二分探索 * 回数 < 線形探索(組み込みの探索) * 回数N log2 N + n * log2 N < n * N
N log2 N < n(N - log2 N)
(N log2 N) / (N - log2 N) < n
log2 N < n (Nが十分大きい時)
おおよそlog2 N回以上探索した時、組み込みの探索を使うよりも、ソートしてから二分探索した方が高速になる。
確認
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- import math, time def binary_search(v, L): i = 0 j = len(L) - 1 while i != j + 1: m = (i + j) // 2 if L[m] < v: i = m + 1 else: j = m - 1 if 0 <= i < len(L) and L[i] == v: return i else: return - 1 N = 1000 l = [9,0,8,1,7,2,6,4,5] * N + [10] count = math.floor(math.log2(10 * N + 1)) print("回数、二分探索、組み込み関数(L.index)、比率(二分探索/組み込み関数)") for n in range(count - 10, count + 10): l_1 = l[:] start = time.time() l_1.sort() for x in range(n): binary_search(10, l_1) t1 = time.time() - start start = time.time() for x in range(n): l.index(10) t2 = time.time() - start print("{0}, {1:.10f}秒, {2:.10f}秒, {3:.4f}" .format(n, t1, t2, t1 / t2))
入出力結果(Terminal)
$ ./sample.py 回数、二分探索、組み込み関数(L.index)、比率(二分探索/組み込み関数) 3, 0.0024380684秒, 0.0007178783秒, 3.3962 4, 0.0024659634秒, 0.0009291172秒, 2.6541 5, 0.0023989677秒, 0.0011911392秒, 2.0140 6, 0.0024108887秒, 0.0013840199秒, 1.7419 7, 0.0025100708秒, 0.0016059875秒, 1.5629 8, 0.0024089813秒, 0.0019021034秒, 1.2665 9, 0.0024089813秒, 0.0021131039秒, 1.1400 10, 0.0024271011秒, 0.0023050308秒, 1.0530 11, 0.0024609566秒, 0.0025429726秒, 0.9677 12, 0.0024697781秒, 0.0027589798秒, 0.8952 13, 0.0024509430秒, 0.0030219555秒, 0.8110 14, 0.0024359226秒, 0.0032019615秒, 0.7608 15, 0.0024290085秒, 0.0034270287秒, 0.7088 16, 0.0024490356秒, 0.0036599636秒, 0.6691 17, 0.0024840832秒, 0.0038850307秒, 0.6394 18, 0.0024979115秒, 0.0044231415秒, 0.5647 19, 0.0024981499秒, 0.0043418407秒, 0.5754 20, 0.0024969578秒, 0.0045928955秒, 0.5437 21, 0.0025179386秒, 0.0048110485秒, 0.5234 22, 0.0025010109秒, 0.0050280094秒, 0.4974 $
だいたい真ん中(log2 N回)あたりで比率が1以下になってるから考え方はあってるのかなぁ。
2.
選択ソート
[1, 5, 4, 3, 7, 6, 2] [1, 2, 4, 3, 7, 6, 5] [1, 2, 3, 4, 7, 6, 5] [1, 2, 3, 4, 7, 6, 5] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7]
挿入ソート
[6, 5, 4, 3, 7, 1, 2] [5, 6, 4, 3, 7, 1, 2] [4, 5, 6, 3, 7, 1, 2] [3, 4, 5, 6, 7, 1, 2] [3, 4, 5, 6, 7, 1, 2] [1, 3, 4, 5, 6, 7, 2] [1, 2, 3, 4, 5, 6, 7]
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- def selection_sort(L): i = 0 while i != len(L): smallest = find_min(L, i) L[i], L[smallest] = L[smallest], L[i] i += 1 print(L) def find_min(L, b): smallest = b i = b + 1 while i != len(L): if L[i] < L[smallest]: smallest = i i += 1 return smallest def insertion_sort(L): i = 0 while i != len(L): insert(L, i) i += 1 print(L) def insert(L, b): i = b while i != 0 and L[i - 1] >= L[b]: i -= 1 value = L[b] del L[b] L.insert(i, value) l1 = [6, 5, 4, 3, 7, 1, 2] l2 = l1[:] selection_sort(l1) print() insertion_sort(l2)
入出力結果(Terminal)
$ ./sample.py [1, 5, 4, 3, 7, 6, 2] [1, 2, 4, 3, 7, 6, 5] [1, 2, 3, 4, 7, 6, 5] [1, 2, 3, 4, 7, 6, 5] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [6, 5, 4, 3, 7, 1, 2] [5, 6, 4, 3, 7, 1, 2] [4, 5, 6, 3, 7, 1, 2] [3, 4, 5, 6, 7, 1, 2] [3, 4, 5, 6, 7, 1, 2] [1, 3, 4, 5, 6, 7, 2] [1, 2, 3, 4, 5, 6, 7] $
3.
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- import nose def bubble_sort(L): l = len(L) - 1 while l > 0: for i in range(l): if L[i] > L[i+1]: L[i], L[i+1] = L[i+1], L[i] print("\t{0}".format(L)) l -= 1 print(L) def run(original, expected): bubble_sort(original) assert original == expected def test_empty(): run([], []) def test_one(): run([1], [1]) def test_two_ordered(): run([1, 2], [1, 2]) def test_two_reversed(): run([2, 1], [1, 2]) def test_three_identical(): run([3, 3, 3], [3, 3, 3]) def test_three_split(): run([3, 0, 3], [0, 3, 3]) if __name__ == '__main__': bubble_sort([6, 5, 4, 3, 7, 1, 2]) print("ここからnoseテスト") nose.runmodule()
入出力結果(Terminal)
$ ./sample.py [5, 6, 4, 3, 7, 1, 2] [5, 4, 6, 3, 7, 1, 2] [5, 4, 3, 6, 7, 1, 2] [5, 4, 3, 6, 7, 1, 2] [5, 4, 3, 6, 1, 7, 2] [5, 4, 3, 6, 1, 2, 7] [5, 4, 3, 6, 1, 2, 7] [4, 5, 3, 6, 1, 2, 7] [4, 3, 5, 6, 1, 2, 7] [4, 3, 5, 6, 1, 2, 7] [4, 3, 5, 1, 6, 2, 7] [4, 3, 5, 1, 2, 6, 7] [4, 3, 5, 1, 2, 6, 7] [3, 4, 5, 1, 2, 6, 7] [3, 4, 5, 1, 2, 6, 7] [3, 4, 1, 5, 2, 6, 7] [3, 4, 1, 2, 5, 6, 7] [3, 4, 1, 2, 5, 6, 7] [3, 4, 1, 2, 5, 6, 7] [3, 1, 4, 2, 5, 6, 7] [3, 1, 2, 4, 5, 6, 7] [3, 1, 2, 4, 5, 6, 7] [1, 3, 2, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] ここからnoseテスト ...... ---------------------------------------------------------------------- Ran 6 tests in 0.002s OK $
4.
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- import nose def bubble_sort(L): l = len(L) - 1 n = 0 while n < l: for i in range(l, n, -1): if L[i] < L[i - 1]: L[i - 1], L[i] = L[i], L[i - 1] print("\t{0}".format(L)) n += 1 print(L) def run(original, expected): bubble_sort(original) assert original == expected def test_empty(): run([], []) def test_one(): run([1], [1]) def test_two_ordered(): run([1, 2], [1, 2]) def test_two_reversed(): run([2, 1], [1, 2]) def test_three_identical(): run([3, 3, 3], [3, 3, 3]) def test_three_split(): run([3, 0, 3], [0, 3, 3]) if __name__ == '__main__': bubble_sort([6, 5, 4, 3, 7, 1, 2]) print("ここからnoseテスト") nose.runmodule()
入出力結果(Terminal)
$ ./sample.py [6, 5, 4, 3, 7, 1, 2] [6, 5, 4, 3, 1, 7, 2] [6, 5, 4, 1, 3, 7, 2] [6, 5, 1, 4, 3, 7, 2] [6, 1, 5, 4, 3, 7, 2] [1, 6, 5, 4, 3, 7, 2] [1, 6, 5, 4, 3, 7, 2] [1, 6, 5, 4, 3, 2, 7] [1, 6, 5, 4, 2, 3, 7] [1, 6, 5, 2, 4, 3, 7] [1, 6, 2, 5, 4, 3, 7] [1, 2, 6, 5, 4, 3, 7] [1, 2, 6, 5, 4, 3, 7] [1, 2, 6, 5, 4, 3, 7] [1, 2, 6, 5, 3, 4, 7] [1, 2, 6, 3, 5, 4, 7] [1, 2, 3, 6, 5, 4, 7] [1, 2, 3, 6, 5, 4, 7] [1, 2, 3, 6, 5, 4, 7] [1, 2, 3, 6, 4, 5, 7] [1, 2, 3, 4, 6, 5, 7] [1, 2, 3, 4, 6, 5, 7] [1, 2, 3, 4, 6, 5, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] [1, 2, 3, 4, 5, 6, 7] ここからnoseテスト ...... ---------------------------------------------------------------------- Ran 6 tests in 0.002s OK $
5.
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- import time def selection_sort(L): i = 0 while i != len(L): smallest = find_min(L, i) L[i], L[smallest] = L[smallest], L[i] i += 1 def find_min(L, b): smallest = b i = b + 1 while i != len(L): if L[i] < L[smallest]: smallest = i i += 1 return smallest def insertion_sort(L): i = 0 while i != len(L): insert(L, i) i += 1 def insert(L, b): i = b while i != 0 and L[i - 1] >= L[b]: i -= 1 value = L[b] del L[b] L.insert(i, value) def bubble_sort(L): l = len(L) - 1 while l > 0: for i in range(l): if L[i] > L[i+1]: L[i], L[i+1] = L[i+1], L[i] l -= 1 def print_times(L): print(len(L)) for func in (insertion_sort, selection_sort, bubble_sort): L_copy = L[:] t1 = time.time() func(L_copy) t2 = time.time() print("\t{0:15} {1:.1f}".format(func.__name__, (t2 - t1) * 1000)) if __name__ == '__main__': for list_size in [10, 1000, 2000, 3000]: L = [x for x in range(list_size)] L.reverse() print_times(L)
入出力結果(Terminal)
$ ./sample.py 10 insertion_sort 0.1 selection_sort 0.1 bubble_sort 0.1 1000 insertion_sort 902.4 selection_sort 552.6 bubble_sort 913.5 2000 insertion_sort 1863.0 selection_sort 2133.3 bubble_sort 4057.8 3000 insertion_sort 4160.9 selection_sort 4995.2 bubble_sort 8486.8 $
リストが大きくなるにつれて、速度に差が出てくる。(速い順に挿入ソート、選択ソート、バブルソート)特にバブルソートは他の2つのソート方法に比べて遅いみたい。
6.
ソートされている場合。
コピーは0回。
比較は、
(N - 1) + (N - 2) + ・・・+ 2 = (N - 1 + 2)(N - 2) / 2 = (N - 2)(N - 3) / 2 回
逆順になっている場合。
比較の回数はソートされている場合と同じ、(N - 2)(N - 3) / 2 回
コピーの回数は比較の都度コピーされるので、
(N - 2)(N - 3) / 2 * 2 =(N - 2)(N - 3)回7.
値の挿入するさいに、挿入する位置より上野値を全て1つずつずらさなければならないという事が考慮されてない。
8.
Nの階乗。要素が一意じゃない場合には、Nの階乗通りの中に、重複するリストができてしまう。
9.
コード(BBEdit)
sample.py
#!/usr/bin/env python3.3 #-*- coding: utf-8 -*- import time def selection_sort(L): i = 0 while i != len(L): smallest = find_min(L, i) L[i], L[smallest] = L[smallest], L[i] i += 1 def find_min(L, b): smallest = b i = b + 1 while i != len(L): if L[i] < L[smallest]: smallest = i i += 1 return smallest def insertion_sort(L): i = 0 while i != len(L): insert(L, i) i += 1 def insert(L, b): i = b while i != 0 and L[i - 1] >= L[b]: i -= 1 value = L[b] del L[b] L.insert(i, value) def bubble_sort(L): l = len(L) - 1 while l > 0: for i in range(l): if L[i] > L[i+1]: L[i], L[i+1] = L[i+1], L[i] l -= 1 def merge(L1, L2): newL = [] i1 = 0 i2 = 0 l = len(L1) + len(L2) i = 0 while i < l: if i1 >= len(L1): newL.append(L2[i2]) i2 += 1 elif i2 >= len(L2): newL.append(L1[i1]) i1 += 1 elif L1[i1] <= L2[i2]: newL.append(L1[i1]) i1 += 1 else: newL.append(L2[i2]) i2 += 1 i += 1 return newL def mergesort(L): workspace = [] for i in range(len(L)): workspace.append([L[i]]) i = 0 while i< len(workspace) - 1: L1 = workspace[i] L2 = workspace[i+1] newL = merge(L1, L2) workspace.append(newL) i += 2 if len(workspace) != 0: L[:] = workspace[-1][:] def default_merge(L1, L2): newL = [] i1 = 0 i2 = 0 while i1 != len(L1) and i2 != len(L2): if L1[i1] <= L2[i2]: newL.append(L1[i1]) i1 += 1 else: newL.append(L2[i2]) i2 += 1 newL.extend(L1[i1:]) newL.extend(L2[i2:]) return newL def default_mergesort(L): workspace = [] for i in range(len(L)): workspace.append([L[i]]) i = 0 while i< len(workspace) - 1: L1 = workspace[i] L2 = workspace[i+1] newL = default_merge(L1, L2) workspace.append(newL) i += 2 if len(workspace) != 0: L[:] = workspace[-1][:] def builtin_sort(L): L.sort() def print_times(L): print(len(L)) for func in (insertion_sort, selection_sort, bubble_sort, mergesort, default_mergesort, builtin_sort): L_copy = L[:] t1 = time.time() func(L_copy) t2 = time.time() print("\t{0:20} {1:>10.1f}".format(func.__name__, (t2 - t1) * 1000)) if __name__ == '__main__': for list_size in [10, 1000, 2000, 3000]: L = [x for x in range(list_size)] L.reverse() print_times(L)
入出力結果(Terminal)
$ ./sample.py 10 insertion_sort 0.0 selection_sort 0.0 bubble_sort 0.0 mergesort 0.1 default_mergesort 0.1 builtin_sort 0.0 1000 insertion_sort 141.2 selection_sort 180.1 bubble_sort 285.5 mergesort 9.5 default_mergesort 6.7 builtin_sort 0.0 2000 insertion_sort 569.7 selection_sort 723.6 bubble_sort 1218.7 mergesort 21.2 default_mergesort 14.2 builtin_sort 0.1 3000 insertion_sort 1409.0 selection_sort 1644.7 bubble_sort 2733.7 mergesort 32.5 default_mergesort 22.1 builtin_sort 0.1 $
速度が少し遅くなってるし、書き換え方法違ってるかも。。
0 コメント:
コメントを投稿