資料結構之效率比較

Github 連結: https://github.com/xwk1246/data-structure-compare

系統環境

OS: Manjaro 21.0.2 Ornara

Kernel: x86_64 Linux 5.10.30-1-MANJARO

CPU: Intel Core i7-4790K @ 8x 4.4GHz

GPU: GeForce GTX 1070

RAM: 16GB DDR3 1866MHz

資料集生成方法

使用 rand 函式生成範圍內整數並寫入檔案

insert data set

產生指定數量整數，每筆數字是唯一，用來 insert 進資料結構

int genInsertDataSet(int dataSize) {
    int i;
    int* exist = (int*)malloc(sizeof(int) * MAXDATALEN);　//建立重複檢查表
    memset(exist, 0, MAXDATALEN);　//將檢查表全部值初始化為0
    FILE* fp = fopen("./insertdataset.txt", "w");　//以write模式開啟檔案，如不存在則新增
    for (i = 0; i < dataSize; i++) {　//重複dataSize次
        int tmp;
        do {
            tmp = rand() % 10000000;　//僅隨機取10,000,000以內整數
        }
        while (exist[tmp]);　//當數字已經出現時再執行一次，直到沒出現過
        fprintf(fp, "%d\n", tmp);　//寫入檔案並換行
        exist[tmp] = 1;　//紀錄為已出現過
    }
    free(exist);　//清除空間
    fclose(fp);　//關檔
}

query data set

產生指定數量整數，用來查詢資料結構是否存在該值

int genQueryDataSet(int datasize) {
    int i;
    int tmp;
    FILE* fp = fopen("./querydataset.txt", "w");　//以write模式開啟檔案，如不存在則新增
    for (i = 0; i < datasize; i++) {　//重複dataSize次
        tmp = rand() % MAXDATALEN; //僅隨機取10,000,000以內整數
        fprintf(fp, "%d\n", tmp); //寫入檔案並換行
    }
    fclose(fp); //關檔
}

資料結構實做

Linked list

每個 node 儲存資料並儲存指向下一個或前一個 node 位置的指標

insert

將目標 insert 進 Linked list 前方

struct LinkedList* ll_insert(struct LinkedList* node, int target) {
    struct LinkedList* ptr;
    ptr = (struct LinkedList*)malloc(sizeof(struct LinkedList));　//配置空間給節點
    ptr->data = target;　//將資料存入節點
    if (!node) {　//如果傳入資料結構為NULL
        ptr->next = NULL;　//將新增的節點下個節點指向NULL
        return ptr;　//回傳新建的指標，為linked list最前方節點
    }
    ptr->next = node;　//將新建的節點下個位置指向資料結構目前最前方節點
    return ptr;　//回傳新建的指標，為linked list最前方節點
}

find

用依序尋訪的方式來尋找

struct LinkedList* ll_find(struct LinkedList* node, int target) {
    struct LinkedList* current;
    current = node; //當前節點
    while (current != NULL) {　//當節點存在
        if (current->data == target)　//當節點資料等於目標
            return current;　//回傳符合的節點
        current = current->next;　//不符合時將當前節點設為下一個節點位置
    }
    return NULL;　//到linked list結束時仍未找到則回傳NULL
}

Array

insert

將目標 insert 進該 array 中

struct Arr* arr_insert(struct Arr* arr, int target) {
    struct Arr* ptr;
    if (!arr) { //當array不存在時
        ptr = (struct Arr*)malloc(sizeof(struct Arr));　//分配空間給array
        ptr->data[0] = target;　//將數字放進第0個位置
        ptr->tail = 1;　//儲存下一筆資料該儲存的位置
        return ptr;　//回傳新建的array struct
    }
    arr->data[arr->tail] = target; //array存在時則在最後面新增值
    arr->tail++;　//把下個開始位置加一
    return arr;　//回傳陣列指標
}

find with binary search

用二元搜尋法在 array 中尋找值

int* arr_bs_find(struct Arr* arr, int target) {
    int front, back;
    int mid;
    int i;
    front = 0;　//前方設為array開頭
    back = arr->tail - 1; //將後方設為陣列最後一個有存資料的位置
    mid = (front + back) / 2;　//中間等於前後相加除二
    while (front <= back) {　//當前後位置未交換時
        mid = (front + back) / 2;　//取得中間值
        if (target > arr->data[mid]) {　//當值比中間大時
            front = mid + 1; //前等於中間
        }
        else if (target < arr->data[mid]) { //當值比中間小時
            back = mid - 1; //後等於中間
        }
        else if (target == arr->data[mid]) {　//當值等於中間時
            return &(arr->data[mid]);　//表示找到並回傳
        }
    }
    return NULL;　//當前後碰撞仍未找到則回傳NULL
}

find with traversal

用尋訪的方式來比對值並找到

int* arr_traverse_find(struct Arr* arr, int target) {
    int i;
    for (i = 0; i < arr->tail; i++) {　//從頭到array結尾訪問
        if (arr->data[i] == target) {　//當找到時
            return &arr->data[i]; //回傳
            break;
        }
    }
    return NULL;　//當尋訪完仍未找到，回傳NULL
}

Binary Search Tree

insert

將目標 insert 進該資料結構中

struct Bst* bst_insert(struct Bst* node, int target) {
    struct Bst* ptr;
    if (!node) { //當bst不存在
        ptr = (struct Bst*)malloc(sizeof(struct Bst));　//分配新空間
        ptr->data = target; //將值儲存進節點
        ptr->lchild = NULL; //左child等於NULL
        ptr->rchild = NULL; //右chile等於NULL
        return ptr; //回傳建立的bst
    }
    if (target < node->data)　//當值小於當前node值
        node->lchild = bst_insert(node->lchild, target); //遞迴重複執行
    else if (target > node->data)　//當值大於當前node值
        node->rchild = bst_insert(node->rchild, target);　//遞迴重複執行
    return node;　//回傳root
}

find

struct Bst* bst_find(struct Bst* node, int target) {
    if (!node) { //當遞迴到樹末端
        return NULL;　//回傳NULL
    }
    if (target < node->data)　//當值小於當前node值
        return bst_find(node->lchild, target);　//遞迴重複執行
    else if (target > node->data)　//當值大於當前node值
        return bst_find(node->rchild, target);　//遞迴重複執行
    else {
        return node;　//回傳找到的node
    }
}

Hash

hash function

以原始數字透過 hash function 產生一組新數

int hash65(int num) {
    long long sum;
    int i;
    char word[8];
    sprintf(word, "%d", num); //將整數轉為字串
    int len = strlen(word);　//取得位數
    sum = 0;　//初始總和為0
    for (i = 0; i < len; i++) {
        sum = word[i] + sum * 65;　//sum為第n位字符編碼*65^n-1相加
    }
    return (int)(sum % 10000000);　回傳保證範圍為10,000,000以內
}

insert

將目標 insert 進該資料結構中

struct hash_node** hash_insert(struct hash_node** hashTable, int target) {
    int hv;
    hv = hash65(target); //將值轉為hash後value
    if (!hashTable) { //當hash table不存在
        hashTable = malloc(sizeof(struct hash_node) * MAXHASH);　//配置記憶體空間
        memset(hashTable, 0, MAXHASH);　//初始匯為0
    }
    hashTable[hv] = hash_ll_insert(hashTable[hv], target); //在hash table該value所在位置加入值
    return hashTable;　//回傳hash table位置
}

handle collision

當遇到 hash value 衝突，使用 linked list insert at front

struct hash_node* hash_ll_insert(struct hash_node* node, int target) {
    struct hash_node* ptr = (struct hash_node*)malloc(sizeof(struct hash_node));　//建立新node
    ptr->key = target;　//在node儲存值
    if (!node) { //當hash table該位置為空
        ptr->next = NULL; //node下個位置設為NULL
        ptr->cnt = 1; //為第一個值
    }
    else {　//當hash table有重複
        ptr->next = node;　//將新node新增在已存在node前
        ptr->cnt = node->cnt + 1;　//重複值數量加一
    }
    return ptr;　//回傳該node
}

find

struct hash_node* hash_find(struct hash_node** hashTable, int target) {
    int hv = hash65(target); //將值轉為hash後value
    struct hash_node* current = hashTable[hv]; //儲存當前node
    current = hashTable[hv]; //當前node為hash table該hv位置
    while (current != NULL) {　//traverse直到結尾
        if (current->key == target) //當找到時
            return current;　//回傳節點
        current = current->next; //未找到則繼續尋找
    }
    return NULL; //仍未找到回傳NULL
}

執行時間

(取三次結果，同一次 test 使用相同測資，使用參數-d 1e5 -q 1e4，單位為毫秒)

第一次結果

bst:
building time: 32.90 ms
query time: 3.56 ms

bs:
building time: 0.56 ms
query time: 1.54 ms

arr:
building time: 0.51 ms
query time: 1432.03 ms

ll:
building time: 2.47 ms
query time: 2622.17 ms

hash:
building time: 53.86 ms
query time: 1.64 ms

第二次結果

bst:
building time: 30.80 ms
query time: 3.52 ms

bs:
building time: 0.68 ms
query time: 1.70 ms

arr:
building time: 0.52 ms
query time: 1422.75 ms

ll:
building time: 2.49 ms
query time: 2549.99 ms

hash:
building time: 53.55 ms
query time: 1.52 ms

第三次結果

bst:
building time: 31.70 ms
query time: 2.99 ms

bs:
building time: 0.57 ms
query time: 1.81 ms

arr:
building time: 0.59 ms
query time: 1409.55 ms

ll:
building time: 2.31 ms
query time: 2596.34 ms

hash:
building time: 53.41 ms
query time: 1.57 ms

花費時間

build

Hash > Binary Search Tree > Linked List > Array with binary search = Array with traversal search

query

Linked List > Array with traversal search > Array with binary search > Binary Search Tree > Hash

速度

build

Array with traversal search = Array with binary search > Linked List > Binary Search Tree > Hash

query

Hash > Binary Search Tree > Array with binary search > Array with traversal search > Linked List

時間複雜度

build

Linked list: O(n)

Array: O(n)

Binary Search Tree: Best case O(nlogn), Worst case O(n^2)

Hash: O(n)

query

Linked list: O(n)

Array with binary search: O(log(n))

Array with traversal search: O(n)

Binary Search Tree: O(log(n))

Hash: O(1)

空間複雜度

build

Linked list: O(n)

Array: O(n)

Binary Search Tree: O(n)

Hash: O(1)

query

Linked list: O(n)

Array with binary search: O(n)

Array with traversal search: O(n)

Binary Search Tree: O(n)

Hash: O(1)

總結

結構建立上 hash 花費時間較多其次為 Binary Search Tree，其餘 linked list 與 array 時間相近。

查詢上 Hash 最快其次有運用到 Binary search 的查詢速度較快，最慢為依序尋訪。

在建立或查詢資料較多時，花費時間根據不同資料結構差異相當大，尤其是一般的尋訪搜尋常常會遇到搜尋不完的情形。

心得

透過實做這些資料結構可以提昇指標和結構的熟練度，也對遞迴更加熟悉。

Reference

https://gateoverflow.in/50681/time-complexity-of-binary-tree-and-bst

https://medium.com/appworks-school/%E5%88%9D%E5%AD%B8%E8%80%85%E5%AD%B8%E6%BC%94%E7%AE%97%E6%B3%95-%E5%BE%9E%E6%99%82%E9%96%93%E8%A4%87%E9%9B%9C%E5%BA%A6%E8%AA%8D%E8%AD%98%E5%B8%B8%E8%A6%8B%E6%BC%94%E7%AE%97%E6%B3%95-%E4%B8%80-b46fece65ba5

Name		Name	Last commit message	Last commit date
Latest commit History 6 Commits
.vscode		.vscode
.gitignore		.gitignore
README.md		README.md
array.c		array.c
array.h		array.h
bst.c		bst.c
bst.h		bst.h
datagenerator.c		datagenerator.c
datagenerator.h		datagenerator.h
hash.c		hash.c
hash.h		hash.h
linkedlist.c		linkedlist.c
linkedlist.h		linkedlist.h
main		main
main.c		main.c
makefile		makefile

xwk1246/data-structure-compare

Folders and files

Latest commit

History

Repository files navigation

資料結構之效率比較

系統環境

資料集生成方法

insert data set

query data set

資料結構實做

Linked list

insert

find

Array

insert

find with binary search

find with traversal

Binary Search Tree

insert

find

Hash

hash function

insert

handle collision

find

執行時間

(取三次結果，同一次 test 使用相同測資，使用參數-d 1e5 -q 1e4，單位為毫秒)

第一次結果

第二次結果

第三次結果

花費時間

build

query

速度

build

query

時間複雜度

build

Linked list: O(n)

Array: O(n)

Binary Search Tree: Best case O(nlogn), Worst case O(n^2)

Hash: O(n)

query

Linked list: O(n)

Array with binary search: O(log(n))

Array with traversal search: O(n)

Binary Search Tree: O(log(n))

Hash: O(1)

空間複雜度

build

Linked list: O(n)

Array: O(n)

Binary Search Tree: O(n)

Hash: O(1)

query

Linked list: O(n)

Array with binary search: O(n)

Array with traversal search: O(n)

Binary Search Tree: O(n)

Hash: O(1)

總結

心得

Reference

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages