-rw-r--r-- | src/load/load_blast_scores.c | 119 | ||||
-rw-r--r-- | src/load/load_blast_scores.h | 11 | ||||
-rw-r--r-- | src/model/blast_scores_data.h | 21 | ||||
-rw-r--r-- | src/model/blast_scores_data_init.c | 43 | ||||
-rw-r--r-- | src/model/blast_scores_data_init.h | 14 |
5 files changed, 208 insertions, 0 deletions
diff --git a/src/load/load_blast_scores.c b/src/load/load_blast_scores.c new file mode 100644 index 0000000..42e6bd9 --- a/dev/null +++ b/src/load/load_blast_scores.c @@ -0,0 +1,119 @@ +#include "error/check_error.h" +#include "error/check_h5_error.h" +#include "model/blast_scores_data.h" +#include "model/blast_scores_data_init.h" +#include "load_blast_scores.h" +#include <hdf5_hl.h> +#include <string.h> +#include <stdlib.h> + +void +load_blast_scores (hid_t file_id, const char *file_name) +{ + size_t dst_size; + size_t dst_offset[BLAST_SCORES_DATA_FIELD_NUM]; + size_t dst_sizes[BLAST_SCORES_DATA_FIELD_NUM]; + hid_t field_type[BLAST_SCORES_DATA_FIELD_NUM]; + + blast_scores_data_init (&dst_size, dst_offset, dst_sizes, field_type); + + hsize_t chunk_size = 10; + int *fill_data = NULL; + int compress = 0; + + blast_scores_data p_data[1000]; + FILE *dat = fopen (file_name, "r"); + if (dat == NULL) + check_error (__FILE__, __LINE__); + char *line = NULL; + size_t len = 0; + int current_line = 0; + int i = -1; + + while (getline (&line, &len, dat) != -1) + { + current_line++; + i++; + + char *running = strdup (line); + char *token = NULL; + + token = strsep (&running, ","); + p_data[i].source_gi = atoi (&token[4]); + + token = strsep (&running, ","); + p_data[i].source_start = atoi (token); + + token = strsep (&running, ","); + p_data[i].source_end = atoi (token); + + token = strsep (&running, ","); + p_data[i].target_gi = atoi (&token[4]); + + token = strsep (&running, ","); + p_data[i].target_start = atoi (token); + + token = strsep (&running, ","); + p_data[i].target_end = atoi (token); + + token = strsep (&running, ","); + p_data[i].score = atoi (token); + + token = strsep (&running, ","); + p_data[i].bit_score = strtod (token, NULL); + + token = strsep (&running, ","); + p_data[i].evalue = strtod (token, NULL); + + if (current_line == 1) + { + + const char *blast_scores_data_field_names[BLAST_SCORES_DATA_FIELD_NUM] = + BLAST_SCORES_DATA_FIELD_NAMES; + + herr_t status = H5TBmake_table ("blast", file_id, + "blast", + BLAST_SCORES_DATA_FIELD_NUM, 1, + dst_size, + blast_scores_data_field_names, + dst_offset, field_type, + chunk_size, fill_data, + compress, + &p_data); + + if (status < 0) + check_h5_error (__FILE__, __LINE__); + + } + + if ((i % 1000 == 0) && (i > 0)) + { + + herr_t status = + H5TBappend_records (file_id, "blast", 1000, + dst_size, dst_offset, dst_sizes, + &p_data[0]); + if(status < 0) + check_h5_error (__FILE__, __LINE__); + + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); + if (status < 0) + check_h5_error (__FILE__, __LINE__); + + printf ("Processed %i of records.\n", current_line); + + i = -1; + } + + if (running) + free (running); + + } // End for each line of the input file. + + if (line) + free (line); + + fclose (dat); + + return; +} |