-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | src/Makefile.am | 10 | ||||
-rw-r--r-- | src/assign_protein_type.c | 135 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 67 | ||||
-rw-r--r-- | src/sequence_data.h | 16 | ||||
-rw-r--r-- | src/sequence_data_init.c | 37 | ||||
-rw-r--r-- | src/sequence_data_init.h | 14 |
7 files changed, 188 insertions, 96 deletions
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 749b7ad..fd35254 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c | |||
@@ -1,62 +1,22 @@ | |||
1 | #include "load_influenza_faa.h" | ||
2 | #include "check_error.h" | 1 | #include "check_error.h" |
3 | #include "check_h5_error.h" | 2 | #include "check_h5_error.h" |
4 | #include "hdf5_hl.h" | 3 | #include "load_influenza_faa.h" |
4 | #include "sequence_data.h" | ||
5 | #include "sequence_data_init.h" | ||
6 | #include <hdf5_hl.h> | ||
5 | #include <string.h> | 7 | #include <string.h> |
6 | #include <stdlib.h> | 8 | #include <stdlib.h> |
7 | 9 | ||
8 | #define SEQUENCE_DATA_FIELD_NUM 4 | ||
9 | |||
10 | void | 10 | void |
11 | load_influenza_faa (hid_t file_id) | 11 | load_influenza_faa (hid_t file_id) |
12 | { | 12 | { |
13 | typedef struct | 13 | size_t dst_size; |
14 | { | 14 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM]; |
15 | int gi; | 15 | size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM]; |
16 | char gb[9]; | ||
17 | char description[196]; | ||
18 | char protein_type[7]; | ||
19 | } sequence_data; | ||
20 | |||
21 | size_t dst_size = sizeof (sequence_data); | ||
22 | size_t dst_offset[SEQUENCE_DATA_FIELD_NUM] = | ||
23 | { HOFFSET (sequence_data, gi), | ||
24 | HOFFSET (sequence_data, gb), | ||
25 | HOFFSET (sequence_data, description), | ||
26 | HOFFSET (sequence_data, protein_type) | ||
27 | }; | ||
28 | |||
29 | sequence_data dst_buf[1]; | ||
30 | |||
31 | size_t dst_sizes[SEQUENCE_DATA_FIELD_NUM] = { | ||
32 | sizeof (dst_buf[0].gi), | ||
33 | sizeof (dst_buf[0].gb), | ||
34 | sizeof (dst_buf[0].description), | ||
35 | sizeof (dst_buf[0].protein_type) | ||
36 | }; | ||
37 | |||
38 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; | 16 | hid_t field_type[SEQUENCE_DATA_FIELD_NUM]; |
39 | 17 | ||
40 | field_type[0] = H5T_NATIVE_INT; | 18 | sequence_data_init (&dst_size, dst_offset, dst_sizes, field_type); |
41 | 19 | ||
42 | hid_t gb_type = H5Tcopy (H5T_C_S1); | ||
43 | H5Tset_size (gb_type, 9); | ||
44 | field_type[1] = gb_type; | ||
45 | |||
46 | hid_t description_type = H5Tcopy (H5T_C_S1); | ||
47 | H5Tset_size (description_type, 196); | ||
48 | field_type[2] = description_type; | ||
49 | |||
50 | hid_t protein_type_type = H5Tcopy (H5T_C_S1); | ||
51 | H5Tset_size (protein_type_type, 7); | ||
52 | field_type[3] = protein_type_type; | ||
53 | |||
54 | const char *field_names[SEQUENCE_DATA_FIELD_NUM] = | ||
55 | { "GI", | ||
56 | "GB", | ||
57 | "Description", | ||
58 | "Protein Type" }; | ||
59 | |||
60 | hsize_t chunk_size = 10; | 20 | hsize_t chunk_size = 10; |
61 | int *fill_data = NULL; | 21 | int *fill_data = NULL; |
62 | int compress = 0; | 22 | int compress = 0; |
@@ -99,12 +59,15 @@ load_influenza_faa (hid_t file_id) | |||
99 | 59 | ||
100 | strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); | 60 | strncpy (p_data.protein_type, "", sizeof (p_data.protein_type)); |
101 | 61 | ||
62 | const char* sequence_data_field_names[SEQUENCE_DATA_FIELD_NUM] = | ||
63 | SEQUENCE_DATA_FIELD_NAMES; | ||
64 | |||
102 | if (current_line == 1) | 65 | if (current_line == 1) |
103 | { | 66 | { |
104 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | 67 | herr_t status = H5TBmake_table ("influenza.faa", file_id, |
105 | "influenza.faa", | 68 | "influenza.faa", |
106 | SEQUENCE_DATA_FIELD_NUM, 1, | 69 | SEQUENCE_DATA_FIELD_NUM, 1, |
107 | dst_size, field_names, | 70 | dst_size, sequence_data_field_names, |
108 | dst_offset, field_type, | 71 | dst_offset, field_type, |
109 | chunk_size, fill_data, compress, | 72 | chunk_size, fill_data, compress, |
110 | &p_data); | 73 | &p_data); |
@@ -132,9 +95,5 @@ load_influenza_faa (hid_t file_id) | |||
132 | 95 | ||
133 | fclose (dat); | 96 | fclose (dat); |
134 | 97 | ||
135 | H5Tclose (gb_type); | ||
136 | H5Tclose (description_type); | ||
137 | H5Tclose (protein_type_type); | ||
138 | |||
139 | return; | 98 | return; |
140 | } | 99 | } |