-rw-r--r-- | configure.ac | 20 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/aggregator.c | 14 | ||||
-rw-r--r-- | src/assign_protein_type.c | 72 | ||||
-rw-r--r-- | src/assign_protein_type.h | 6 | ||||
-rw-r--r-- | src/check_error.c | 14 | ||||
-rw-r--r-- | src/check_error.h | 11 | ||||
-rw-r--r-- | src/check_h5_error.c | 12 | ||||
-rw-r--r-- | src/check_h5_error.h | 12 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 13 | ||||
-rw-r--r-- | src/check_ncbi_error.h | 13 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 31 |
12 files changed, 211 insertions, 13 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 2ba59a7..643ea3f 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c @@ -1,8 +1,80 @@ #include "assign_protein_type.h" +#include "check_ncbi_error.h" +#include <ncbi.h> +#include <readdb.h> +#include <blast.h> +#include <salpacc.h> +#include <stdbool.h> + +/* + * BLAST database containing all of the influenza protein sequences. + */ +#define SEQDB "/home/don/exp004/influenzadb/influenzadb" + +/* + * BLAST reference database of prototypical protein types. + */ +#define REFDB "/home/don/exp004/influenzadb/proteinnames" + +#define BUFFER_LEN 50 void assign_protein_type (hid_t file_id) { + /* + * Iterate through the records for which no protein type has been + * assigned. Create a BioSeq Pointer to the data and then use this + * as input to the BLAST run against the reference database of + * prototypical sequence records. + */ + + /* + * Make BLAST messages reportable. + */ + ErrSetMessageLevel (SEV_WARNING); + + /* + * Open the BLAST sequence database. + */ + ReadDBFILEPtr seqdb = readdb_new (SEQDB, true); + + /* + * Get default BLAST options. + */ + BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true); + ValNodePtr error_returns = NULL; + + /* + * Read the sequence from the database by GI. + */ + Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL); + BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number); + + SeqAlignPtr seqalign = BioseqBlastEngine (bsp, + "blastp", + REFDB, + options, + NULL, + &error_returns, + NULL); + + if (error_returns != NULL) + check_ncbi_error (error_returns, __FILE__, __LINE__); + + if (seqalign != NULL) + { + Char target_id_buf[BUFFER_LEN + 1]; + SeqIdPtr target_id = SeqAlignId (seqalign, 1); + SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); + printf ("%s\n", + target_id_buf); + } + + // Clean up memory for the next ieration. + seqalign = SeqAlignSetFree (seqalign); + bsp = BioseqFree (bsp); + + options = BLASTOptionDelete (options); return; } |