1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
#include "assign_protein_type.h"
#include "check_ncbi_error.h"
#include <ncbi.h>
#include <readdb.h>
#include <blast.h>
#include <salpacc.h>
#include <stdbool.h>
/*
* BLAST database containing all of the influenza protein sequences.
*/
#define SEQDB "/home/don/exp004/influenzadb/influenzadb"
/*
* BLAST reference database of prototypical protein types.
*/
#define REFDB "/home/don/exp004/influenzadb/proteinnames"
#define BUFFER_LEN 50
void
assign_protein_type (hid_t file_id)
{
/*
* Iterate through the records for which no protein type has been
* assigned. Create a BioSeq Pointer to the data and then use this
* as input to the BLAST run against the reference database of
* prototypical sequence records.
*/
/*
* Make BLAST messages reportable.
*/
ErrSetMessageLevel (SEV_WARNING);
/*
* Open the BLAST sequence database.
*/
ReadDBFILEPtr seqdb = readdb_new (SEQDB, true);
/*
* Get default BLAST options.
*/
BLAST_OptionsBlkPtr options = BLASTOptionNew ("blastp", true);
ValNodePtr error_returns = NULL;
/*
* Read the sequence from the database by GI.
*/
Int4 sequence_number = readdb_gi2seq (seqdb, 453644, NULL);
BioseqPtr bsp = readdb_get_bioseq(seqdb, sequence_number);
SeqAlignPtr seqalign = BioseqBlastEngine (bsp,
"blastp",
REFDB,
options,
NULL,
&error_returns,
NULL);
if (error_returns != NULL)
check_ncbi_error (error_returns, __FILE__, __LINE__);
if (seqalign != NULL)
{
Char target_id_buf[BUFFER_LEN + 1];
SeqIdPtr target_id = SeqAlignId (seqalign, 1);
SeqIdWrite (target_id, target_id_buf, PRINTID_FASTA_SHORT, BUFFER_LEN);
printf ("%s\n",
target_id_buf);
}
// Clean up memory for the next ieration.
seqalign = SeqAlignSetFree (seqalign);
bsp = BioseqFree (bsp);
options = BLASTOptionDelete (options);
return;
}
|