-rw-r--r-- | src/aggregator.c | 3 | ||||
-rw-r--r-- | src/assign/assign_protein_type.c | 110 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.c | 13 | ||||
-rw-r--r-- | src/load/load_influenza_aa_dat.h | 2 | ||||
-rw-r--r-- | src/load/load_influenza_faa.c | 30 | ||||
-rw-r--r-- | src/load/load_influenza_faa.h | 2 | ||||
-rw-r--r-- | src/model/gi_type_data_init.c | 4 | ||||
-rw-r--r-- | src/model/gi_type_data_init.h | 4 | ||||
-rw-r--r-- | src/model/sequence_data_init.c | 18 | ||||
-rw-r--r-- | src/model/sequence_data_init.h | 6 |
10 files changed, 99 insertions, 93 deletions
diff --git a/src/assign/assign_protein_type.c b/src/assign/assign_protein_type.c index 9a0717b..1df4c8d 100644 --- a/src/assign/assign_protein_type.c +++ b/src/assign/assign_protein_type.c @@ -70,7 +70,7 @@ assign_protein_type (hid_t file_id) if (status < 0) check_h5_error (status, __FILE__, __LINE__); - sequence_data* faa_buf = malloc (sizeof(sequence_data) * faa_nrecords); + sequence_data *faa_buf = malloc (sizeof (sequence_data) * faa_nrecords); size_t faa_size; size_t faa_offset[SEQUENCE_DATA_FIELD_NUM]; @@ -86,7 +86,7 @@ assign_protein_type (hid_t file_id) /* * Allocate memory for the new table. */ - gi_type_data* new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); + gi_type_data *new_buf = malloc (sizeof (gi_type_data) * faa_nrecords); if (new_buf == NULL) check_error (__FILE__, __LINE__); @@ -101,7 +101,7 @@ assign_protein_type (hid_t file_id) hid_t gi_field_type[GI_TYPE_DATA_FIELD_NUM]; gi_type_data_init (&gi_size, gi_offset, gi_sizes, gi_field_type); - gi_type_data* old_buf = NULL; + gi_type_data *old_buf = NULL; /* * If the table is already present read the values into memory and @@ -117,10 +117,11 @@ assign_protein_type (hid_t file_id) if (status < 0) check_h5_error (status, __FILE__, __LINE__); - printf (" Using gi_type_data cache of %i records.\n", (int)gi_nrecords); - - old_buf = malloc (sizeof(gi_type_data) * gi_nrecords); - + printf (" Using gi_type_data cache of %i records.\n", + (int) gi_nrecords); + + old_buf = malloc (sizeof (gi_type_data) * gi_nrecords); + status = H5TBread_table (file_id, "gi_type_data", gi_size, gi_offset, gi_sizes, old_buf); if (status < 0) @@ -129,18 +130,18 @@ assign_protein_type (hid_t file_id) status = H5TBdelete_record (file_id, "gi_type_data", 0, gi_nrecords); if (status < 0) check_h5_error (status, __FILE__, __LINE__); - + } /* * If the table is not already present create it. */ else - { + { printf ("Creating gi_type_data.\n"); - const char* gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = + const char *gi_type_data_field_names[GI_TYPE_DATA_FIELD_NUM] = GI_TYPE_DATA_FIELD_NAMES; hsize_t chunk_size = 10; @@ -152,8 +153,7 @@ assign_protein_type (hid_t file_id) GI_TYPE_DATA_FIELD_NUM, 0, gi_size, gi_type_data_field_names, gi_offset, gi_field_type, - chunk_size, fill_data, compress, - NULL); + chunk_size, fill_data, compress, NULL); if (status < 0) check_h5_error (status, __FILE__, __LINE__); @@ -169,7 +169,7 @@ assign_protein_type (hid_t file_id) "Allocation of cache failed."); ENTRY e, *ep; - for (int i = 0; i < (int)gi_nrecords; i++) + for (int i = 0; i < (int) gi_nrecords; i++) { char gi_chr[25]; snprintf (gi_chr, 25, "%i", old_buf[i].gi); @@ -183,14 +183,14 @@ assign_protein_type (hid_t file_id) /* * Assign protein types to records for which the field is empty. */ - printf ("Records to process: %i\n", (int)faa_nrecords); + printf ("Records to process: %i\n", (int) faa_nrecords); int written = 0; - for (int i = 0; i < (int)faa_nrecords; i++) + for (int i = 0; i < (int) faa_nrecords; i++) { new_buf[i].gi = faa_buf[i].gi; strncpy (new_buf[i].type, "", sizeof (new_buf[i].type)); strncpy (new_buf[i].protein, "", sizeof (new_buf[i].protein)); - + char gi_chr[25]; snprintf (gi_chr, 25, "%i", faa_buf[i].gi); e.key = gi_chr; @@ -199,24 +199,23 @@ assign_protein_type (hid_t file_id) /* * A record was not found in the cache for this gi. */ - if (hsearch_r (e, FIND, &ep, &htab) == 0) + if (hsearch_r (e, FIND, &ep, &htab) == 0) { - + /* * Read the sequence from the database by GI. */ Int4 sequence_number = readdb_gi2seq (seqdb, faa_buf[i].gi, NULL); BioseqPtr bsp = readdb_get_bioseq (seqdb, sequence_number); - if (bsp == NULL) + if (bsp == NULL) { error_at_line (EXIT_FAILURE, 0, __FILE__, __LINE__, "Unable to find BLAST record for gi|%i. Ensure " "the BLAST database is up-to-date with the HDF5 " "record set. See the BLAST formatdb.log file " - "for details.\n", - faa_buf[i].gi); + "for details.\n", faa_buf[i].gi); } - + SeqAlignPtr seqalign = BioseqBlastEngine (bsp, "blastp", REFDB, @@ -224,19 +223,19 @@ assign_protein_type (hid_t file_id) NULL, &error_returns, NULL); - + /* * BLAST reported an error. Write it out and continue processing. */ if (error_returns != NULL) { CharPtr msg = BlastErrorToString (error_returns); - printf ("Warning: An error has been reported by the NCBI Toolkit " - "API for sequence gi|%i: %s", - faa_buf[i].gi, msg); - free (msg); + printf + ("Warning: An error has been reported by the NCBI Toolkit " + "API for sequence gi|%i: %s", faa_buf[i].gi, msg); + free (msg); } - + /* * A hit was found. Record the first hit as the protein type. * Skip the first 4 characters and eat the "lcl|". @@ -245,18 +244,18 @@ assign_protein_type (hid_t file_id) { Char target_id_buf[BUFFER_LEN + 1]; SeqIdPtr target_id = SeqAlignId (seqalign, 1); - SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, + SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); // Species Type new_buf[i].type[0] = target_id_buf[4]; new_buf[i].type[1] = '\0'; - + // Protein Type (Skip the underscore in the string). - strncpy (new_buf[i].protein, &target_id_buf[6], - sizeof (new_buf[i].protein)); + strncpy (new_buf[i].protein, &target_id_buf[6], + sizeof (new_buf[i].protein)); } - + /* * BLAST did not find any hits. */ @@ -265,73 +264,74 @@ assign_protein_type (hid_t file_id) printf ("Warning: Unable to identify protein type for sequence " "gi|%i\n", faa_buf[i].gi); } - + /* * Clean up memory for the next ieration. */ seqalign = SeqAlignSetFree (seqalign); bsp = BioseqFree (bsp); - - } // End existing entry not found. + + } // End existing entry not found. /* * Hash table entry found. Keep the old value. */ else { - gi_type_data* old_value = (gi_type_data*)ep->data; + gi_type_data *old_value = (gi_type_data *) ep->data; new_buf[i].gi = old_value->gi; - strncpy (new_buf[i].type, old_value->type, sizeof (new_buf[i].type)); - strncpy (new_buf[i].protein, old_value->protein, sizeof (new_buf[i].protein)); + strncpy (new_buf[i].type, old_value->type, + sizeof (new_buf[i].type)); + strncpy (new_buf[i].protein, old_value->protein, + sizeof (new_buf[i].protein)); } - + /* * Write the data out to the file. */ - if ( (i % 1000 == 0) && (i > 0) ) + if ((i % 1000 == 0) && (i > 0)) { status = H5TBappend_records (file_id, "gi_type_data", 1000, - gi_size, gi_offset, gi_sizes, - &new_buf[i-1000]); + gi_size, gi_offset, gi_sizes, + &new_buf[i - 1000]); if (status < 0) check_h5_error (status, __FILE__, __LINE__); - + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); if (status < 0) check_h5_error (status, __FILE__, __LINE__); written = i; - printf ("Processed %i of %i records.\n", i, (int)faa_nrecords); + printf ("Processed %i of %i records.\n", i, (int) faa_nrecords); } - + } /* * Write out records from the last bin if it was less than 1000 * records in size. */ - if ((int)faa_nrecords < 1000) + if ((int) faa_nrecords < 1000) { status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords, - gi_size, gi_offset, gi_sizes, - new_buf); + gi_size, gi_offset, gi_sizes, new_buf); } else { - status = H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, - gi_size, gi_offset, gi_sizes, - &new_buf[written]); + status = + H5TBappend_records (file_id, "gi_type_data", faa_nrecords - written, + gi_size, gi_offset, gi_sizes, &new_buf[written]); } if (status < 0) check_h5_error (status, __FILE__, __LINE__); - + status = H5Fflush (file_id, H5F_SCOPE_GLOBAL); if (status < 0) check_h5_error (status, __FILE__, __LINE__); - + free (faa_buf); free (old_buf); free (new_buf); @@ -339,6 +339,6 @@ assign_protein_type (hid_t file_id) options = BLASTOptionDelete (options); readdb_destruct (seqdb); - + return; } |