author | Don Pellegrino <don@drexel.edu> | 2010-01-23 23:00:22 (GMT) |
---|---|---|
committer | Don Pellegrino <don@drexel.edu> | 2010-01-23 23:00:22 (GMT) |
commit | d2dda50ed620d93cb1c3c9705b3379c3507b8d9c (patch) (unidiff) | |
tree | 40df511a81bd346cbba2f7c1d753e36249e81f73 | |
parent | 6bfe5755d6a9b3d88032287e97681c65b7f32f0f (diff) | |
download | exp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.zip exp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.tar.gz exp007-d2dda50ed620d93cb1c3c9705b3379c3507b8d9c.tar.bz2 |
Routines to load the results of a BLAST run into the HDF5 file.
-rw-r--r-- | src/load/load_blast_scores.c | 119 | ||||
-rw-r--r-- | src/load/load_blast_scores.h | 11 | ||||
-rw-r--r-- | src/model/blast_scores_data.h | 21 | ||||
-rw-r--r-- | src/model/blast_scores_data_init.c | 43 | ||||
-rw-r--r-- | src/model/blast_scores_data_init.h | 14 |
5 files changed, 208 insertions, 0 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c new file mode 100644 index 0000000..42e6bd9 --- a/dev/null +++ b/src/load/load_blast_scores.c | |||
@@ -0,0 +1,119 @@ | |||
1 | #include "error/check_error.h" | ||
2 | #include "error/check_h5_error.h" | ||
3 | #include "model/blast_scores_data.h" | ||
4 | #include "model/blast_scores_data_init.h" | ||
5 | #include "load_blast_scores.h" | ||
6 | #include <hdf5_hl.h> | ||
7 | #include <string.h> | ||
8 | #include <stdlib.h> | ||
9 | |||
10 | void | ||
11 | load_blast_scores (hid_t file_id, const char *file_name) | ||
12 | { | ||
13 | size_t dst_size; | ||
14 | size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM]; | ||
15 | size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM]; | ||
16 | hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM]; | ||
17 | |||
18 | blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type); | ||
19 | |||
20 | hsize_t chunk_size = 10; | ||
21 | int *fill_data = NULL; | ||
22 | int compress = 0; | ||
23 | |||
24 | blast_scores_data p_data[1000]; | ||
25 | FILE *dat = fopen (file_name, "r"); | ||
26 | if (dat == NULL) | ||
27 | check_error (__FILE__, __LINE__); | ||
28 | char *line = NULL; | ||
29 | size_t len = 0; | ||
30 | int current_line = 0; | ||
31 | int i = -1; | ||
32 | |||
33 | while (getline (&line, &len, dat) != -1) | ||
34 | { | ||
35 | current_line++; | ||
36 | i++; | ||
37 | |||
38 | char *running = strdup (line); | ||
39 | char *token = NULL; | ||
40 | |||
41 | token = strsep (&running, ","); | ||
42 | p_data[i].source_gi = atoi (&token[4]); | ||
43 | |||
44 | token = strsep (&running, ","); | ||
45 | p_data[i].source_start = atoi (token); | ||
46 | |||
47 | token = strsep (&running, ","); | ||
48 | p_data[i].source_end = atoi (token); | ||
49 | |||
50 | token = strsep (&running, ","); | ||
51 | p_data[i].target_gi = atoi (&token[4]); | ||
52 | |||
53 | token = strsep (&running, ","); | ||
54 | p_data[i].target_start = atoi (token); | ||
55 | |||
56 | token = strsep (&running, ","); | ||
57 | p_data[i].target_end = atoi (token); | ||
58 | |||
59 | token = strsep (&running, ","); | ||
60 | p_data[i].score = atoi (token); | ||
61 | |||
62 | token = strsep (&running, ","); | ||
63 | p_data[i].bit_score = strtod (token, NULL); | ||
64 | |||
65 | token = strsep (&running, ","); | ||
66 | p_data[i].evalue = strtod (token, NULL); | ||
67 | |||
68 | if (current_line == 1) | ||
69 | { | ||
70 | |||
71 | const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = | ||
72 | BLAST_SCORES_DATA_FIELD_NAMES; | ||
73 | |||
74 | herr_t status = H5TBmake_table ("blast", file_id, | ||
75 | "blast", | ||
76 | BLAST_SCORES_DATA_FIELD_NUM, 1, | ||
77 | dst_size, | ||
78 | blast_scores_data_field_names, | ||
79 | dst_offset, field_type, | ||
80 | chunk_size, fill_data, | ||
81 | compress, | ||
82 | &p_data); | ||
83 | |||
84 | if (status < 0) | ||
85 | check_h5_error (__FILE__, __LINE__); | ||
86 | |||
87 | } | ||
88 | |||
89 | if ((i % 1000 == 0) && (i > 0)) | ||
90 | { | ||
91 | |||
92 | herr_t status = | ||
93 | H5TBappend_records (file_id, "blast", 1000, | ||
94 | dst_size, dst_offset, dst_sizes, | ||
95 | &p_data[0]); | ||
96 | if(status < 0) | ||
97 | check_h5_error (__FILE__, __LINE__); | ||
98 | |||
99 | status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); | ||
100 | if (status < 0) | ||
101 | check_h5_error (__FILE__, __LINE__); | ||
102 | |||
103 | printf ("Processed %i of records.\n", current_line); | ||
104 | |||
105 | i = -1; | ||
106 | } | ||
107 | |||
108 | if (running) | ||
109 | free (running); | ||
110 | |||
111 | } // End for each line of the input file. | ||
112 | |||
113 | if (line) | ||
114 | free (line); | ||
115 | |||
116 | fclose (dat); | ||
117 | |||
118 | return; | ||
119 | } | ||
diff --git a/src/load/load_blast_scores.h b/src/load/load_blast_scores.h new file mode 100644 index 0000000..e41968d --- a/dev/null +++ b/src/load/load_blast_scores.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef LOAD_BLAST_SCORES_H | ||
2 | #define LOAD_BLAST_SCORES_H | ||
3 | |||
4 | #include <hdf5.h> | ||
5 | |||
6 | /* | ||
7 | * Load the results of a BLAST run into the HDF5 container. | ||
8 | */ | ||
9 | void load_blast_scores (hid_t file_id, const char *file_name); | ||
10 | |||
11 | #endif // LOAD_BLAST_SCORES_H | ||
diff --git a/src/model/blast_scores_data.h b/src/model/blast_scores_data.h new file mode 100644 index 0000000..8c4aaef --- a/dev/null +++ b/src/model/blast_scores_data.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef BLAST_SCORES_DATA_H | ||
2 | #define BLAST_SCORES_DATA_H | ||
3 | |||
4 | #define BLAST_SCORES_DATA_FIELD_NUM 9 | ||
5 | |||
6 | #define BLAST_SCORES_DATA_FIELD_NAMES { "Source GI", "Source Start", "Source End", "Target GI", "Target Start", "Target End", "Score", "Bit Score", "Evalue" } | ||
7 | |||
8 | typedef struct | ||
9 | { | ||
10 | int source_gi; | ||
11 | int source_start; | ||
12 | int source_end; | ||
13 | int target_gi; | ||
14 | int target_start; | ||
15 | int target_end; | ||
16 | int score; | ||
17 | double bit_score; | ||
18 | double evalue; | ||
19 | } blast_scores_data; | ||
20 | |||
21 | #endif // BLAST_SCORES_DATA_H | ||
diff --git a/src/model/blast_scores_data_init.c b/src/model/blast_scores_data_init.c new file mode 100644 index 0000000..14fa6f0 --- a/dev/null +++ b/src/model/blast_scores_data_init.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include "blast_scores_data_init.h" | ||
2 | #include "blast_scores_data.h" | ||
3 | |||
4 | void | ||
5 | blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | ||
6 | hid_t *field_type) | ||
7 | { | ||
8 | *dst_size = sizeof (blast_scores_data); | ||
9 | |||
10 | dst_offset[0] = HOFFSET (blast_scores_data, source_gi); | ||
11 | dst_offset[1] = HOFFSET (blast_scores_data, source_start); | ||
12 | dst_offset[2] = HOFFSET (blast_scores_data, source_end); | ||
13 | dst_offset[3] = HOFFSET (blast_scores_data, target_gi); | ||
14 | dst_offset[4] = HOFFSET (blast_scores_data, target_start); | ||
15 | dst_offset[5] = HOFFSET (blast_scores_data, target_end); | ||
16 | dst_offset[6] = HOFFSET (blast_scores_data, score); | ||
17 | dst_offset[7] = HOFFSET (blast_scores_data, bit_score); | ||
18 | dst_offset[8] = HOFFSET (blast_scores_data, evalue); | ||
19 | |||
20 | blast_scores_data dst_buf[1]; | ||
21 | |||
22 | dst_sizes[0] = sizeof (dst_buf[0].source_gi); | ||
23 | dst_sizes[1] = sizeof (dst_buf[0].source_start); | ||
24 | dst_sizes[2] = sizeof (dst_buf[0].source_end); | ||
25 | dst_sizes[3] = sizeof (dst_buf[0].target_gi); | ||
26 | dst_sizes[4] = sizeof (dst_buf[0].target_start); | ||
27 | dst_sizes[5] = sizeof (dst_buf[0].target_end); | ||
28 | dst_sizes[6] = sizeof (dst_buf[0].score); | ||
29 | dst_sizes[7] = sizeof (dst_buf[0].bit_score); | ||
30 | dst_sizes[8] = sizeof (dst_buf[0].evalue); | ||
31 | |||
32 | field_type[0] = H5T_NATIVE_INT; | ||
33 | field_type[1] = H5T_NATIVE_INT; | ||
34 | field_type[2] = H5T_NATIVE_INT; | ||
35 | field_type[3] = H5T_NATIVE_INT; | ||
36 | field_type[4] = H5T_NATIVE_INT; | ||
37 | field_type[5] = H5T_NATIVE_INT; | ||
38 | field_type[6] = H5T_NATIVE_INT; | ||
39 | field_type[7] = H5T_NATIVE_DOUBLE; | ||
40 | field_type[8] = H5T_NATIVE_DOUBLE; | ||
41 | |||
42 | return; | ||
43 | } | ||
diff --git a/src/model/blast_scores_data_init.h b/src/model/blast_scores_data_init.h new file mode 100644 index 0000000..cae6edd --- a/dev/null +++ b/src/model/blast_scores_data_init.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef BLAST_SCORES_DATA_INIT_H | ||
2 | #define BLAST_SCORES_DATA_INIT_H | ||
3 | |||
4 | #include <hdf5.h> | ||
5 | |||
6 | /* | ||
7 | * Initialize the structures describing the struct. These descriptive | ||
8 | * structures are used by the HDF5 API. | ||
9 | */ | ||
10 | void | ||
11 | blast_scores_data_init (size_t *dst_size, size_t *dst_offset, size_t *dst_sizes, | ||
12 | hid_t *field_type); | ||
13 | |||
14 | #endif // BLAST_SCORES_DATA_INIT_H | ||