-rw-r--r-- | configure.ac | 20 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/aggregator.c | 14 | ||||
-rw-r--r-- | src/assign_protein_type.c | 72 | ||||
-rw-r--r-- | src/assign_protein_type.h | 6 | ||||
-rw-r--r-- | src/check_error.c | 14 | ||||
-rw-r--r-- | src/check_error.h | 11 | ||||
-rw-r--r-- | src/check_h5_error.c | 12 | ||||
-rw-r--r-- | src/check_h5_error.h | 12 | ||||
-rw-r--r-- | src/check_ncbi_error.c | 13 | ||||
-rw-r--r-- | src/check_ncbi_error.h | 13 | ||||
-rw-r--r-- | src/load_influenza_aa_dat.c | 31 |
12 files changed, 211 insertions, 13 deletions
diff --git a/src/assign_protein_type.c b/src/assign_protein_type.c index 2ba59a7..643ea3f 100644 --- a/src/assign_protein_type.c +++ b/src/assign_protein_type.c | |||
@@ -1,8 +1,80 @@ | |||
1 | #include "assign_protein_type.h" | 1 | #include "assign_protein_type.h" |
2 | #include "check_ncbi_error.h" | ||
3 | #include <ncbi.h> | ||
4 | #include <readdb.h> | ||
5 | #include <blast.h> | ||
6 | #include <salpacc.h> | ||
7 | #include <stdbool.h> | ||
8 | |||
9 | /* | ||
10 | * BLAST database containing all of the influenza protein sequences. | ||
11 | */ | ||
12 | #define SEQDB "/home/don/exp004/influenzadb/influenzadb" | ||
13 | |||
14 | /* | ||
15 | * BLAST reference database of prototypical protein types. | ||
16 | */ | ||
17 | #define REFDB "/home/don/exp004/influenzadb/proteinnames" | ||
18 | |||
19 | #define BUFFER_LEN 50 | ||
2 | 20 | ||
3 | void | 21 | void |
4 | assign_protein_type (hid_t file_id) | 22 | assign_protein_type (hid_t file_id) |
5 | { | 23 | { |
24 | /* | ||
25 | * Iterate through the records for which no protein type has been | ||
26 | * assigned. Create a BioSeq Pointer to the data and then use this | ||
27 | * as input to the BLAST run against the reference database of | ||
28 | * prototypical sequence records. | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * Make BLAST messages reportable. | ||
33 | */ | ||
34 | ErrSetMessageLevel (SEV_WARNING); | ||
35 | |||
36 | /* | ||
37 | * Open the BLAST sequence database. | ||
38 | */ | ||
39 | ReadDBFILEPtr seqdb = readdb_new (SEQDB, true); | ||
40 | |||
41 | /* | ||
42 | * Get default BLAST options. | ||
43 | */ | ||
44 | BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true); | ||
45 | ValNodePtr error_returns = NULL; | ||
46 | |||
47 | /* | ||
48 | * Read the sequence from the database by GI. | ||
49 | */ | ||
50 | Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL); | ||
51 | BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number); | ||
52 | |||
53 | SeqAlignPtr seqalign = BioseqBlastEngine (bsp, | ||
54 | "blastp", | ||
55 | REFDB, | ||
56 | options, | ||
57 | NULL, | ||
58 | &error_returns, | ||
59 | NULL); | ||
60 | |||
61 | if (error_returns != NULL) | ||
62 | check_ncbi_error (error_returns, __FILE__, __LINE__); | ||
63 | |||
64 | if (seqalign != NULL) | ||
65 | { | ||
66 | Char target_id_buf[BUFFER_LEN + 1]; | ||
67 | SeqIdPtr target_id = SeqAlignId (seqalign, 1); | ||
68 | SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN); | ||
69 | printf ("%s\n", | ||
70 | target_id_buf); | ||
71 | } | ||
72 | |||
73 | // Clean up memory for the next ieration. | ||
74 | seqalign = SeqAlignSetFree (seqalign); | ||
75 | bsp = BioseqFree (bsp); | ||
76 | |||
77 | options = BLASTOptionDelete (options); | ||
6 | 78 | ||
7 | return; | 79 | return; |
8 | } | 80 | } |