Skip to content

Commit

Permalink
add new example for parallel loading
Browse files Browse the repository at this point in the history
  • Loading branch information
38 committed Feb 25, 2022
1 parent db587ea commit 7f96bf1
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 11 deletions.
16 changes: 16 additions & 0 deletions .vscode/c_cpp_properties.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [],
"compilerPath": "/usr/bin/clang",
"cStandard": "c17",
"cppStandard": "c++14",
"intelliSenseMode": "linux-clang-x64"
}
],
"version": 4
}
7 changes: 6 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,10 @@
"clus",
"framefile",
"ptab"
]
],
"files.associations": {
"*.pss": "pscript",
"d4.h": "c",
"stdlib.h": "c"
}
}
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2020-2021 Hao Hou<[email protected]>
Copyright (c) 2020-2022 Hao Hou<[email protected]>

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand Down
2 changes: 1 addition & 1 deletion d4binding/example/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ DETECTED_SO_DIR=$(shell dirname $(DETECTED_SO) 2> /dev/null)
DETECTED_CONFIG=$(shell basename $(DETECTED_SO_DIR) 2> /dev/null)
CONFIG:=$(if $(DETECTED_CONFIG),$(DETECTED_CONFIG),debug)
BIND_BIN_DIR=../../target/$(CONFIG)
OBJS=read create read-interval error mean-depth
OBJS=read create read-interval error mean-depth multithread-read

all: print_config $(OBJS:%=bin/%) bin/libd4binding.so #bin/sample.d4

Expand Down
118 changes: 118 additions & 0 deletions d4binding/example/multithread-read.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#include <stdio.h>
#include <d4.h>
#include <string.h>

typedef struct {
const char* chrom;
uint32_t start;
uint32_t end;
int* buffer;
} load_request_t;

typedef struct {
int* buf;
size_t count;
} task_ctx_t;

void* init(d4_task_part_t* handle, void* extra_data)
{
load_request_t const* load_req = (load_request_t const*)extra_data;
task_ctx_t* ret = (task_ctx_t*)malloc(sizeof(task_ctx_t));

ret->buf = NULL;
ret->count = 0;

char buf[20];
uint32_t l, r;
d4_task_chrom(handle, buf, sizeof(buf));
d4_task_range(handle, &l, &r);

if(strcmp(buf, load_req->chrom) == 0 && l < load_req->end && load_req->start < r)
{
ret->buf = load_req->buffer + (l - load_req->start);
ret->count = (r < load_req->end ) ? r - l : load_req->end - l;
}
return ret;
}

int proc(d4_task_part_t* handle, void* task_context, void* extra_data)
{
uint32_t l,r;
d4_task_range(handle, &l, &r);

task_ctx_t* result = (task_ctx_t*)task_context;

size_t count;
for(count = 0; l < r && count < result->count; )
{
int count = d4_task_read_values(handle, l, result->buf + count, result->count - count);
l += count;
}
return 0;
}

int clean(d4_task_part_result_t* tasks, size_t count, void* extra)
{
size_t i;
for(i = 0; i < count; i++)
free(tasks[i].task_context);

return 0;
}

int* parallel_load_chromosome(d4_file_t* fp, char const* chrom)
{
d4_file_metadata_t hdr = {};
d4_file_load_metadata(fp, &hdr);

int i;
for(i = 0; i < hdr.chrom_count; i ++)
{
if(strcmp(hdr.chrom_name[i], chrom) == 0)
break;
}

if(i == hdr.chrom_count) return NULL;

size_t chrom_size = hdr.chrom_size[i];

load_request_t req = {
.chrom = chrom,
.start = 0,
.end = chrom_size,
.buffer = (int*)malloc(sizeof(int) * chrom_size)
};

d4_task_desc_t task = {
.mode = D4_TASK_READ,
.part_size_limit = 1000000,
.num_cpus = 8,
.part_context_create_cb = init,
.part_process_cb = proc,
.part_finalize_cb = clean,
.extra_data = &req
};

d4_file_run_task(fp, &task);

return req.buffer;
}



int main(int argc, char** argv)
{
if(argc != 3) {
printf("Usage: %s <input.d4> <chr-name>\n", argv[0]);
return 1;
}

d4_file_t* fp = d4_open(argv[1], "r");

int* data = parallel_load_chromosome(fp, argv[2]);

free(data);

d4_close(fp);
return 0;
}
30 changes: 22 additions & 8 deletions d4binding/example/read.c
Original file line number Diff line number Diff line change
@@ -1,41 +1,55 @@
// This example demostrate how to reading a D4 file

#include <stdio.h>
#include <d4.h>

int main(int argc, char** argv)
{
if(argc != 2) {
printf("Usage: %s <input.d4>\n", argv[0]);
printf("Usage: %s <Path/URL>\n", argv[0]);
return 1;
}


// Open a D4 file
d4_file_t* fp = d4_open(argv[1], "r");

// Read the metadata living in this file.
// Please note in order to avoid memory leakage, you must call d4_file_metadata_clear to release
// all the internal memory allocated to hold the metadata.
d4_file_metadata_t mt = {};
d4_file_load_metadata(fp, &mt);

// Print out information of each chromosome
int i;
for(i = 0; i < mt.chrom_count; i ++)
printf("# %s %d\n", mt.chrom_name[i], mt.chrom_size[i]);


// Release the memory allocated for metadata
d4_file_metadata_clear(&mt);

for(;;) {

int data[20000];

char chr[20];

uint32_t pos;

d4_file_tell(fp, chr, 20, &pos);

ssize_t count = d4_file_read_values(fp, data, 20000);

// Get the current cursor location
d4_file_tell(fp, chr, sizeof(chr), &pos);

// Read the values from the file
ssize_t count = d4_file_read_values(fp, data, sizeof(data) / sizeof(data[0]));

// If the count is less than 0, it means we are reaching the end of file
// To check if there's an error, you can use d4_error_num() to check if there's any error code is set
if(count <= 0) break;

// Print out the value one by one
for(i = 0; i < count; i ++)
printf("%s %d %d\n", chr, pos + i, data[i]);
}

// Close the D4 file and release all the memory allocated for reading this file
d4_close(fp);
return 0;
}

0 comments on commit 7f96bf1

Please sign in to comment.