-rw-r--r-- | src/aggregator.c | 2 | ||||
-rw-r--r-- | src/assign_protein_type.c | 7 | ||||
-rw-r--r-- | src/assign_protein_type.h | 3 | ||||
-rw-r--r-- | src/check_error.c | 2 | ||||
-rw-r--r-- | src/check_error.h | 3 | ||||
-rw-r--r-- | src/check_h5_error.c | 2 | ||||
-rw-r--r-- | src/check_h5_error.h | 2 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 3 | ||||
-rw-r--r-- | src/check_ncbi_error.h | 2 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 259 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.h | 3 | ||||
-rw-r--r-- | src/load_influenza_faa.c | 2 | ||||
-rw-r--r-- | src/load_influenza_faa.h | 3 |
13 files changed, 145 insertions, 148 deletions
diff --git a/src/load_influenza_aa_dat.c b/src/load_influenza_aa_dat.c index 91ef415..9ee3c46 100644 --- a/src/load_influenza_aa_dat.c +++ b/src/load_influenza_aa_dat.c @@ -46,60 +46,63 @@ load_influenza_aa_dat (hid_t file_id) * memory." */ size_t dst_size = sizeof (supplementary_data); - size_t dst_offset[NFIELDS] = { HOFFSET ( supplementary_data, genbank_accession_number ), - HOFFSET ( supplementary_data, host ), - HOFFSET ( supplementary_data, genome_segment_number ), - HOFFSET ( supplementary_data, subtype ), - HOFFSET ( supplementary_data, country ), - HOFFSET ( supplementary_data, year ), - HOFFSET ( supplementary_data, sequence_length ), - HOFFSET ( supplementary_data, virus_name ), - HOFFSET ( supplementary_data, age ), - HOFFSET ( supplementary_data, gender ), - HOFFSET ( supplementary_data, full_length_indicator )}; + size_t dst_offset[NFIELDS] = + { HOFFSET (supplementary_data, genbank_accession_number), + HOFFSET (supplementary_data, host), + HOFFSET (supplementary_data, genome_segment_number), + HOFFSET (supplementary_data, subtype), + HOFFSET (supplementary_data, country), + HOFFSET (supplementary_data, year), + HOFFSET (supplementary_data, sequence_length), + HOFFSET (supplementary_data, virus_name), + HOFFSET (supplementary_data, age), + HOFFSET (supplementary_data, gender), + HOFFSET (supplementary_data, full_length_indicator) + }; supplementary_data dst_buf[1]; - size_t dst_sizes[NFIELDS] = { sizeof ( dst_buf[0].genbank_accession_number ), - sizeof ( dst_buf[0].host ), - sizeof ( dst_buf[0].genome_segment_number ), - sizeof ( dst_buf[0].subtype ), - sizeof ( dst_buf[0].country ), - sizeof ( dst_buf[0].year ), - sizeof ( dst_buf[0].sequence_length ), - sizeof ( dst_buf[0].virus_name ), - sizeof ( dst_buf[0].age ), - sizeof ( dst_buf[0].gender ), - sizeof ( dst_buf[0].full_length_indicator)}; + size_t dst_sizes[NFIELDS] = { sizeof (dst_buf[0].genbank_accession_number), + sizeof (dst_buf[0].host), + sizeof (dst_buf[0].genome_segment_number), + sizeof (dst_buf[0].subtype), + sizeof (dst_buf[0].country), + sizeof (dst_buf[0].year), + sizeof (dst_buf[0].sequence_length), + sizeof (dst_buf[0].virus_name), + sizeof (dst_buf[0].age), + sizeof (dst_buf[0].gender), + sizeof (dst_buf[0].full_length_indicator) + }; /* * Map the native types to HDF5 types for each field. */ hid_t field_type[NFIELDS]; - hid_t genbank_accession_number_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size ( genbank_accession_number_type, 9 ); + hid_t genbank_accession_number_type = H5Tcopy (H5T_C_S1); + H5Tset_size (genbank_accession_number_type, 9); field_type[0] = genbank_accession_number_type; - hid_t host_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size ( host_type, 15 ); + hid_t host_type = H5Tcopy (H5T_C_S1); + H5Tset_size (host_type, 15); field_type[1] = host_type; - + field_type[2] = H5T_NATIVE_INT; - hid_t subtype_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size (subtype_type, 7 ); + hid_t subtype_type = H5Tcopy (H5T_C_S1); + H5Tset_size (subtype_type, 7); field_type[3] = subtype_type; - hid_t country_type = H5Tcopy ( H5T_C_S1 ); - H5Tset_size (country_type, 25 ); + hid_t country_type = H5Tcopy (H5T_C_S1); + H5Tset_size (country_type, 25); field_type[4] = country_type; field_type[5] = H5T_NATIVE_INT; field_type[6] = H5T_NATIVE_INT; - hid_t virus_name_type = H5Tcopy ( H5T_C_S1 ); + hid_t virus_name_type = H5Tcopy (H5T_C_S1); H5Tset_size (virus_name_type, 196); field_type[7] = virus_name_type; @@ -118,18 +121,18 @@ load_influenza_aa_dat (hid_t file_id) /* * Labels used for the fields in the table. */ - const char *field_names[NFIELDS] = - { "GenBank accession number", - "Host", - "Genome segment number", - "Subtype", - "Country", - "Year", - "Sequence length", - "Virus name", - "Age", - "Gender", - "Full-length Indicator" }; + const char *field_names[NFIELDS] = { "GenBank accession number", + "Host", + "Genome segment number", + "Subtype", + "Country", + "Year", + "Sequence length", + "Virus name", + "Age", + "Gender", + "Full-length Indicator" + }; /* * Table storage options. @@ -142,7 +145,7 @@ load_influenza_aa_dat (hid_t file_id) * Insert the records. */ supplementary_data p_data; - FILE* dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", + FILE *dat = fopen ("/home/don/exp004/genomes/INFLUENZA/influenza_aa.dat", "r"); if (dat == NULL) check_error (__FILE__, __LINE__); @@ -150,89 +153,89 @@ load_influenza_aa_dat (hid_t file_id) size_t len = 0; int current_line = 0; - while (getline (&line, &len, dat) != -1) { - - current_line++; - char *running = strdup (line); - char *token; - - /* - * Parse the line, handling the case of empty fields represented - * by sequential delimiters. - */ - strncpy(p_data.genbank_accession_number, strsep (&running, "\t"), - sizeof(p_data.genbank_accession_number)); - - strncpy(p_data.host, strsep (&running, "\t"), - sizeof(p_data.host)); - - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.genome_segment_number = 0; - else - p_data.genome_segment_number = atoi(token); - - strncpy(p_data.subtype, strsep (&running, "\t"), - sizeof(p_data.subtype)); - - strncpy(p_data.country, strsep (&running, "\t"), - sizeof(p_data.country)); - - /* - * Convert the year field from text to numeric. Unknown and empty - * values are assigned a numeric value of zero. - */ - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.year = 0; - else if (strcmp (token, "unknown") == 0) - p_data.year = 0; - else if (strcmp (token, "NON") == 0) - p_data.year = 0; - else - p_data.year = atoi(token); - - token = strsep (&running, "\t"); - if (strcmp (token, "\0") == 0) - p_data.sequence_length = 0; - else - p_data.sequence_length = atoi(token); - - strncpy(p_data.virus_name, strsep (&running, "\t"), - sizeof(p_data.virus_name)); - - strncpy(p_data.age, strsep (&running, "\t"), - sizeof(p_data.age)); - - strncpy(p_data.gender, strsep (&running, "\t"), - sizeof(p_data.gender)); - - strncpy(p_data.full_length_indicator, strsep (&running, "\t"), - sizeof(p_data.full_length_indicator)); - - if (current_line == 1) - { - herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, - TABLE_NAME, NFIELDS, 1, dst_size, - field_names, dst_offset, field_type, - chunk_size, fill_data, compress, - &p_data); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); - } - else - { - herr_t status = H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, - dst_offset, dst_sizes, &p_data); - if (status < 0) - check_h5_error (status, __FILE__, __LINE__); - } - - if (running) - free (running); - - } - + while (getline (&line, &len, dat) != -1) + { + + current_line++; + char *running = strdup (line); + char *token; + + /* + * Parse the line, handling the case of empty fields represented + * by sequential delimiters. + */ + strncpy (p_data.genbank_accession_number, strsep (&running, "\t"), + sizeof (p_data.genbank_accession_number)); + + strncpy (p_data.host, strsep (&running, "\t"), sizeof (p_data.host)); + + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.genome_segment_number = 0; + else + p_data.genome_segment_number = atoi (token); + + strncpy (p_data.subtype, strsep (&running, "\t"), + sizeof (p_data.subtype)); + + strncpy (p_data.country, strsep (&running, "\t"), + sizeof (p_data.country)); + + /* + * Convert the year field from text to numeric. Unknown and empty + * values are assigned a numeric value of zero. + */ + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.year = 0; + else if (strcmp (token, "unknown") == 0) + p_data.year = 0; + else if (strcmp (token, "NON") == 0) + p_data.year = 0; + else + p_data.year = atoi (token); + + token = strsep (&running, "\t"); + if (strcmp (token, "\0") == 0) + p_data.sequence_length = 0; + else + p_data.sequence_length = atoi (token); + + strncpy (p_data.virus_name, strsep (&running, "\t"), + sizeof (p_data.virus_name)); + + strncpy (p_data.age, strsep (&running, "\t"), sizeof (p_data.age)); + + strncpy (p_data.gender, strsep (&running, "\t"), + sizeof (p_data.gender)); + + strncpy (p_data.full_length_indicator, strsep (&running, "\t"), + sizeof (p_data.full_length_indicator)); + + if (current_line == 1) + { + herr_t status = H5TBmake_table ("influenza_aa.dat", file_id, + TABLE_NAME, NFIELDS, 1, dst_size, + field_names, dst_offset, field_type, + chunk_size, fill_data, compress, + &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + else + { + herr_t status = + H5TBappend_records (file_id, TABLE_NAME, 1, dst_size, + dst_offset, dst_sizes, &p_data); + if (status < 0) + check_h5_error (status, __FILE__, __LINE__); + } + + if (running) + free (running); + + } + if (line) free (line); |