-rw-r--r-- | src/aggregator.c | 14 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 3 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 2 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 115 |
4 files changed, 126 insertions, 8 deletions
diff --git a/src/aggregator.c b/src/aggregator.c index 5fb9d4a..36ea18c 100644 --- a/src/aggregator.c +++ b/src/aggregator.c | |||
@@ -16,26 +16,26 @@ main () | |||
16 | /* | 16 | /* |
17 | * Create the HDF5 file. | 17 | * Create the HDF5 file. |
18 | */ | 18 | */ |
19 | // hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); | 19 | hid_t file_id = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Load the supplementary protein data file. | 22 | * Load the supplementary protein data file. |
23 | */ | 23 | */ |
24 | // load_influenza_aa_dat (file_id); | 24 | load_influenza_aa_dat (file_id); |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Load the FASTA protein sequence data file. | 27 | * Load the FASTA protein sequence data file. |
28 | */ | 28 | */ |
29 | // load_influenza_faa (file_id); | 29 | load_influenza_faa (file_id); |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Close the HD5 file. | 32 | * Close the HD5 file. |
33 | */ | 33 | */ |
34 | // herr_t status = H5Fclose (file_id); | 34 | herr_t status = H5Fclose (file_id); |
35 | // if (status < 0) | 35 | if (status < 0) |
36 | // check_h5_error (status, __FILE__, __LINE__); | 36 | check_h5_error (status, __FILE__, __LINE__); |
37 | 37 | ||
38 | assign_protein_type (0); | 38 | // assign_protein_type (0); |
39 | 39 | ||
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
diff --git a/src/check_ncbi_error.c b/src/check_ncbi_error.c index 8e1c3b2..6071d1a 100644 --- a/src/check_ncbi_error.c +++ b/src/check_ncbi_error.c | |||
@@ -1,4 +1,7 @@ | |||
1 | #include "check_ncbi_error.h" | 1 | #include "check_ncbi_error.h" |
2 | #include <error.h> | ||
3 | #include <stdlib.h> | ||
4 | #include <blast.h> | ||
2 | 5 | ||
3 | void | 6 | void |
4 | check_ncbi_error (ValNodePtr error_returns, | 7 | check_ncbi_error (ValNodePtr error_returns, |
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c index 9ee3c46..f0d9ee5 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load_influenza_aa_dat.c | |||
@@ -158,7 +158,7 @@ load_influenza_aa_dat (hid_t file_id) | |||
158 | 158 | ||
159 | current_line++; | 159 | current_line++; |
160 | char *running = strdup (line); | 160 | char *running = strdup (line); |
161 | char *token; | 161 | char *token = NULL; |
162 | 162 | ||
163 | /* | 163 | /* |
164 | * Parse the line, handling the case of empty fields represented | 164 | * Parse the line, handling the case of empty fields represented |
diff --git a/src/load_influenza_faa.c b/src/load_influenza_faa.c index 8fd0cd7..61bb99d 100644 --- a/src/load_influenza_faa.c +++ b/src/load_influenza_faa.c | |||
@@ -1,9 +1,124 @@ | |||
1 | #include "load_influenza_faa.h" | 1 | #include "load_influenza_faa.h" |
2 | #include "check_error.h" | ||
3 | #include "check_h5_error.h" | ||
4 | #include "hdf5_hl.h" | ||
5 | #include <string.h> | ||
6 | #include <stdlib.h> | ||
2 | 7 | ||
3 | void | 8 | void |
4 | load_influenza_faa (hid_t file_id) | 9 | load_influenza_faa (hid_t file_id) |
5 | { | 10 | { |
11 | typedef struct | ||
12 | { | ||
13 | int gi; | ||
14 | char gb[9]; | ||
15 | char description[196]; | ||
16 | } sequence_data; | ||
6 | 17 | ||
18 | size_t dst_size = sizeof (sequence_data); | ||
19 | size_t dst_offset[3] = | ||
20 | { HOFFSET (sequence_data, gi), | ||
21 | HOFFSET (sequence_data, gb), | ||
22 | HOFFSET (sequence_data, description) | ||
23 | }; | ||
24 | |||
25 | sequence_data dst_buf[1]; | ||
26 | |||
27 | size_t dst_sizes[3] = { | ||
28 | sizeof (dst_buf[0].gi), | ||
29 | sizeof (dst_buf[0].gb), | ||
30 | sizeof (dst_buf[0].description) | ||
31 | }; | ||
32 | |||
33 | hid_t field_type[3]; | ||
34 | |||
35 | field_type[0] = H5T_NATIVE_INT; | ||
36 | |||
37 | hid_t gb_type = H5Tcopy (H5T_C_S1); | ||
38 | H5Tset_size (gb_type, 9); | ||
39 | field_type[1] = gb_type; | ||
40 | |||
41 | hid_t description_type = H5Tcopy (H5T_C_S1); | ||
42 | H5Tset_size (description_type, 196); | ||
43 | field_type[2] = description_type; | ||
44 | |||
45 | const char *field_names[3] = { "GI", | ||
46 | "GB", | ||
47 | "Description" }; | ||
48 | |||
49 | hsize_t chunk_size = 10; | ||
50 | int *fill_data = NULL; | ||
51 | int compress = 0; | ||
52 | |||
53 | sequence_data p_data; | ||
54 | FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza.faa", | ||
55 | "r"); | ||
56 | if (dat == NULL) | ||
57 | check_error (__FILE__, __LINE__); | ||
58 | char *line = NULL; | ||
59 | size_t len = 0; | ||
60 | int current_line = 0; | ||
61 | |||
62 | while (getline (&line, &len, dat) != -1) | ||
63 | { | ||
64 | current_line++; | ||
65 | |||
66 | // Header line. | ||
67 | if (line[0] == '>') | ||
68 | { | ||
69 | char *running = strdup (line); | ||
70 | char *token = NULL; | ||
71 | |||
72 | // Eat the ">gi". | ||
73 | strsep (&running, "|"); | ||
74 | |||
75 | // GI value. | ||
76 | token = strsep (&running, "|"); | ||
77 | p_data.gi = atoi (token); | ||
78 | |||
79 | // Eat the "gb" | ||
80 | strsep (&running, "|"); | ||
81 | |||
82 | // GB value. | ||
83 | strncpy (p_data.gb, strsep(&running, "|"), sizeof (p_data.gb)); | ||
84 | |||
85 | // Description value. | ||
86 | strncpy (p_data.description, strsep (&running, "|"), | ||
87 | sizeof (p_data.description)); | ||
88 | |||
89 | if (current_line == 1) | ||
90 | { | ||
91 | herr_t status = H5TBmake_table ("influenza.faa", file_id, | ||
92 | "influenza.faa", 3, 1, dst_size, | ||
93 | field_names, dst_offset, | ||
94 | field_type, chunk_size, | ||
95 | fill_data, compress, &p_data); | ||
96 | if (status < 0) | ||
97 | check_h5_error (status, __FILE__, __LINE__); | ||
98 | } | ||
99 | else | ||
100 | { | ||
101 | herr_t status = | ||
102 | H5TBappend_records (file_id, "influenza.faa", 1, dst_size, | ||
103 | dst_offset, dst_sizes, &p_data); | ||
104 | if (status < 0) | ||
105 | check_h5_error (status, __FILE__, __LINE__); | ||
106 | } | ||
107 | |||
108 | if (running) | ||
109 | free (running); | ||
110 | |||
111 | } | ||
112 | |||
113 | } | ||
114 | |||
115 | if (line) | ||
116 | free (line); | ||
117 | |||
118 | fclose (dat); | ||
119 | |||
120 | H5Tclose (gb_type); | ||
121 | H5Tclose (description_type); | ||
7 | 122 | ||
8 | return; | 123 | return; |
9 | } | 124 | } |