-rw-r--r-- | configure.ac | 62 | ||||
-rw-r--r-- | doc/build-cobalt.txt | 24 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/assign/assign_blast_scores.c | 27 | ||||
-rw-r--r-- | src/assign/assign_blast_scores.h | 12 | ||||
-rw-r--r-- | src/updator.c | 20 |
6 files changed, 127 insertions, 24 deletions
diff --git a/configure.ac b/configure.ac index 59c5f36..f1ecdf9 100644 --- a/configure.ac +++ b/configure.ac | |||
@@ -7,20 +7,16 @@ AC_CONFIG_FILES([ | |||
7 | src/Makefile | 7 | src/Makefile |
8 | ]) | 8 | ]) |
9 | 9 | ||
10 | ################ | 10 | # Headers Checks |
11 | # MODULE: HDF5 # | ||
12 | ################ | ||
13 | 11 | ||
14 | AC_SEARCH_LIBS([H5Fcreate],[hdf5],[], | 12 | AC_CHECK_HEADERS([hdf5.h],[], |
15 | [AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)], | 13 | [AC_MSG_ERROR("The HDF5 headers are needed to build the system.")]) |
16 | [-lirc -lmpi -lz -lsz]) | ||
17 | 14 | ||
18 | AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[], | 15 | AC_CHECK_HEADERS([petscconf.h],[], |
19 | [AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)], | 16 | [AC_MSG_ERROR("The PETSc headers are needed to build the system.")]) |
20 | [-lirc -lhdf5 -lmpi -lz -lsz]) | ||
21 | 17 | ||
22 | AC_CHECK_HEADERS([hdf5.h],[], | 18 | AC_CHECK_HEADERS([petscmat.h],[], |
23 | [AC_MSG_ERROR(The HDF5 headers are needed to build the system.)]) | 19 | [AC_MSG_ERROR("The PETSc headers are needed to build the system.")]) |
24 | 20 | ||
25 | ######################## | 21 | ######################## |
26 | # MODULE: NCBI Toolkit # | 22 | # MODULE: NCBI Toolkit # |
@@ -28,18 +24,52 @@ AC_CHECK_HEADERS([hdf5.h],[], | |||
28 | 24 | ||
29 | # Check for the NCBI ToolBox libraries. | 25 | # Check for the NCBI ToolBox libraries. |
30 | AC_SEARCH_LIBS([log10],[m],[], | 26 | AC_SEARCH_LIBS([log10],[m],[], |
31 | [AC_MSG_ERROR(The C Math Library is needed to build the system.)]) | 27 | [AC_MSG_ERROR("The C Math Library is needed to build the system.")]) |
32 | 28 | ||
33 | AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[], | 29 | AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[], |
34 | [AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) | 30 | [AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) |
35 | 31 | ||
36 | AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[], | 32 | AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[], |
37 | [AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) | 33 | [AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) |
38 | 34 | ||
39 | AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[], | 35 | AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[], |
40 | [AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) | 36 | [AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) |
41 | 37 | ||
42 | AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[], | 38 | AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[], |
43 | [AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) | 39 | [AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) |
40 | |||
41 | ################ | ||
42 | # MODULE: HDF5 # | ||
43 | ################ | ||
44 | |||
45 | AC_SEARCH_LIBS([__intel_rtc_uninit_use],[irc]) | ||
46 | AC_SEARCH_LIBS([MPI_File_open],[mpi]) | ||
47 | AC_SEARCH_LIBS([compress2],[z]) | ||
48 | AC_SEARCH_LIBS([SZ_BufftoBuffCompress],[sz]) | ||
49 | |||
50 | AC_SEARCH_LIBS([H5Fcreate],[hdf5],[], | ||
51 | [AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")]) | ||
52 | |||
53 | AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[], | ||
54 | [AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")]) | ||
55 | |||
56 | ################# | ||
57 | # MODULE: PETSc # | ||
58 | ################# | ||
59 | |||
60 | # The AC_SEARCH_LIBS other-libraries list is not particularly useful. | ||
61 | # Autoconf determines if the other-libraries are necessary however in | ||
62 | # the cases where they are indeed necessary they are not actually | ||
63 | # added to the library list. | ||
64 | |||
65 | # These are the dependencies. Random functions are selected from | ||
66 | # these libraries from the list of otherwise undefined references at | ||
67 | # link time. | ||
68 | AC_SEARCH_LIBS([PetscInitialize],[petsc]) | ||
69 | AC_SEARCH_LIBS([VecNorm],[petscvec]) | ||
70 | |||
71 | # MatCreateSeqAIJ is acutally used in the code for this project. | ||
72 | AC_SEARCH_LIBS([MatCreateSeqAIJ],[petscmat],[], | ||
73 | [AC_MSG_ERROR("The Portable Extensible Tookit for Scientific Computation PETSc is needed to build the system.")]) | ||
44 | 74 | ||
45 | AC_OUTPUT | 75 | AC_OUTPUT |
diff --git a/doc/build-cobalt.txt b/doc/build-cobalt.txt index 91b356b..76612a4 100644 --- a/doc/build-cobalt.txt +++ b/doc/build-cobalt.txt | |||
@@ -19,6 +19,24 @@ Building on NCSA Cobalt. | |||
19 | +phdf5-1.8.4 | 19 | +phdf5-1.8.4 |
20 | 20 | ||
21 | -- Configure | 21 | -- Configure |
22 | export CPPFLAGS="-I$HDF5_HOME/include -I$NCBI_DIR/include" | 22 | export PETSC_DIR=/u/ac/dpellegr/apps/Installers/petsc-3.0.0-p10 |
23 | export LDFLAGS="-L/usr/apps/hdf/szip/lib -L$HDF5_HOME/lib -L$NCBI_DIR/lib -L/usr/local/intel/10.1.017/lib" | 23 | |
24 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/apps/hdf/szip/lib | 24 | export CPPFLAGS="\ |
25 | -I$HDF5_HOME/include \ | ||
26 | -I$NCBI_DIR/include \ | ||
27 | -I$PETSC_DIR/include \ | ||
28 | -I$PETSC_DIR/linux-gnu-c-debug/include" | ||
29 | |||
30 | export LDFLAGS="\ | ||
31 | -L$HDF5_HOME/lib \ | ||
32 | -L/usr/apps/hdf/szip/lib \ | ||
33 | -L/usr/local/intel/10.1.017/lib \ | ||
34 | -L$NCBI_DIR/lib \ | ||
35 | -L$PETSC_DIR/linux-gnu-c-debug/lib" | ||
36 | |||
37 | export LD_LIBRARY_PATH=\ | ||
38 | $LD_LIBRARY_PATH:\ | ||
39 | /usr/apps/hdf/szip/lib:\ | ||
40 | $PETSC_DIR/linux-gnu-c-debug/lib | ||
41 | |||
42 | |||
diff --git a/src/Makefile.am b/src/Makefile.am index 752c0fe..407d7cd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am | |||
@@ -10,10 +10,9 @@ aggregator_SOURCES = \ | |||
10 | model/blast_scores_data_init.c \ | 10 | model/blast_scores_data_init.c \ |
11 | model/sequence_data_init.c | 11 | model/sequence_data_init.c |
12 | 12 | ||
13 | aggregator_LDADD = -lirc -lmpi -lsz -lz | ||
14 | |||
15 | updator_SOURCES = \ | 13 | updator_SOURCES = \ |
16 | updator.c \ | 14 | updator.c \ |
15 | assign/assign_blast_scores.c \ | ||
17 | assign/assign_protein_type.c \ | 16 | assign/assign_protein_type.c \ |
18 | error/check_error.c \ | 17 | error/check_error.c \ |
19 | error/check_h5_error.c \ | 18 | error/check_h5_error.c \ |
@@ -21,9 +20,8 @@ updator_SOURCES = \ | |||
21 | model/gi_type_data_init.c \ | 20 | model/gi_type_data_init.c \ |
22 | model/sequence_data_init.c | 21 | model/sequence_data_init.c |
23 | 22 | ||
24 | updator_LDADD = -lirc -lmpi -lsz -lz | ||
25 | |||
26 | noinst_HEADERS = \ | 23 | noinst_HEADERS = \ |
24 | assign/assign_blast_scores.h \ | ||
27 | assign/assign_protein_type.h \ | 25 | assign/assign_protein_type.h \ |
28 | error/check_error.h \ | 26 | error/check_error.h \ |
29 | error/check_h5_error.h \ | 27 | error/check_h5_error.h \ |
diff --git a/src/assign/assign_blast_scores.c b/src/assign/assign_blast_scores.c new file mode 100644 index 0000000..1cd491a --- a/dev/null +++ b/src/assign/assign_blast_scores.c | |||
@@ -0,0 +1,27 @@ | |||
1 | #include "assign_blast_scores.h" | ||
2 | #include <petscmat.h> | ||
3 | #include <stdio.h> | ||
4 | |||
5 | void | ||
6 | assign_blast_scores (hid_t file_id) | ||
7 | { | ||
8 | /* | ||
9 | * Create a sparse matrix for the pairwise protein BLAST scores. | ||
10 | * | ||
11 | * 138,769 proteins x 138,769 proteins = 19,256,835,361 pairs. At 8 | ||
12 | * bytes (double) per pair this will require 154.1 GB of memory to | ||
13 | * hold the full structure. This is available on the compute nodes | ||
14 | * of Cobalt however use of a sparse matrix should reduce this size | ||
15 | * dramatically. | ||
16 | */ | ||
17 | |||
18 | // http://www.netlib.org/blas/blast-forum/blas_sparse_proto.h | ||
19 | // blas_sparse_matrix M = BLAS_duscr_begin (138769, 138769); | ||
20 | |||
21 | Mat M; | ||
22 | MatCreateSeqAIJ (PETSC_COMM_SELF, 138769, 138769, 50, NULL, &M); | ||
23 | |||
24 | MatDestroy (M); | ||
25 | |||
26 | return; | ||
27 | } | ||
diff --git a/src/assign/assign_blast_scores.h b/src/assign/assign_blast_scores.h new file mode 100644 index 0000000..2c52192 --- a/dev/null +++ b/src/assign/assign_blast_scores.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef ASSIGN_BLAST_SCORES_H | ||
2 | #define ASSIGN_BLAST_SCORES_H | ||
3 | |||
4 | #include <hdf5.h> | ||
5 | |||
6 | /* | ||
7 | * Pairwise BLAST of each protein in the NCBI Influenza Sequence | ||
8 | * Database against the full database. | ||
9 | */ | ||
10 | void assign_blast_scores (hid_t file_id); | ||
11 | |||
12 | #endif // ASSIGN_BLAST_SCORES_H | ||
diff --git a/src/updator.c b/src/updator.c index f93e205..0bade65 100644 --- a/src/updator.c +++ b/src/updator.c | |||
@@ -3,16 +3,25 @@ | |||
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include "assign/assign_protein_type.h" | 5 | #include "assign/assign_protein_type.h" |
6 | #include "assign/assign_blast_scores.h" | ||
6 | #include "error/check_h5_error.h" | 7 | #include "error/check_h5_error.h" |
8 | #include <petsc.h> | ||
7 | #include <stdio.h> | 9 | #include <stdio.h> |
8 | #include <signal.h> | 10 | #include <signal.h> |
9 | 11 | ||
10 | #define FILE "influenza.h5" | 12 | #define FILE "influenza.h5" |
11 | 13 | ||
12 | int | 14 | int |
13 | main () | 15 | main (int argc, char **argv) |
14 | { | 16 | { |
15 | /* | 17 | /* |
18 | * Initialize the PETSc database and MPI. | ||
19 | * | ||
20 | * http://www.mcs.anl.gov/petsc/petsc-2/snapshots/petsc-dev/docs/manualpages/Sys/PetscInitialize.html#PetscInitialize | ||
21 | */ | ||
22 | PetscInitialize (&argc, &argv, 0, 0); | ||
23 | |||
24 | /* | ||
16 | * Open the HDF5 file. | 25 | * Open the HDF5 file. |
17 | */ | 26 | */ |
18 | hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT); | 27 | hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT); |
@@ -22,7 +31,14 @@ main () | |||
22 | /* | 31 | /* |
23 | * Assign protein type values to the sequence records. | 32 | * Assign protein type values to the sequence records. |
24 | */ | 33 | */ |
34 | /* | ||
25 | assign_protein_type (file_id); | 35 | assign_protein_type (file_id); |
36 | */ | ||
37 | |||
38 | /* | ||
39 | * Assign pairwise BLAST scores. | ||
40 | */ | ||
41 | assign_blast_scores (file_id); | ||
26 | 42 | ||
27 | /* | 43 | /* |
28 | * Close the HDF5 file. | 44 | * Close the HDF5 file. |
@@ -31,5 +47,7 @@ main () | |||
31 | if (status < 0) | 47 | if (status < 0) |
32 | check_h5_error (__FILE__, __LINE__); | 48 | check_h5_error (__FILE__, __LINE__); |
33 | 49 | ||
50 | PetscFinalize (); | ||
51 | |||
34 | return 0; | 52 | return 0; |
35 | } | 53 | } |