-rw-r--r-- | configure.ac | 62 | ||||
-rw-r--r-- | doc/build-cobalt.txt | 24 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/assign/assign_blast_scores.c | 27 | ||||
-rw-r--r-- | src/assign/assign_blast_scores.h | 12 | ||||
-rw-r--r-- | src/updator.c | 20 |
6 files changed, 127 insertions, 24 deletions
diff --git a/configure.ac b/configure.ac index 59c5f36..f1ecdf9 100644 --- a/configure.ac +++ b/configure.ac @@ -7,20 +7,16 @@ AC_CONFIG_FILES([ src/Makefile ]) -################ -# MODULE: HDF5 # -################ +# Headers Checks -AC_SEARCH_LIBS([H5Fcreate],[hdf5],[], -[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)], -[-lirc -lmpi -lz -lsz]) +AC_CHECK_HEADERS([hdf5.h],[], +[AC_MSG_ERROR("The HDF5 headers are needed to build the system.")]) -AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[], -[AC_MSG_ERROR(The HDF5 libraries are needed to build the system.)], -[-lirc -lhdf5 -lmpi -lz -lsz]) +AC_CHECK_HEADERS([petscconf.h],[], +[AC_MSG_ERROR("The PETSc headers are needed to build the system.")]) -AC_CHECK_HEADERS([hdf5.h],[], -[AC_MSG_ERROR(The HDF5 headers are needed to build the system.)]) +AC_CHECK_HEADERS([petscmat.h],[], +[AC_MSG_ERROR("The PETSc headers are needed to build the system.")]) ######################## # MODULE: NCBI Toolkit # @@ -28,18 +24,52 @@ AC_CHECK_HEADERS([hdf5.h],[], # Check for the NCBI ToolBox libraries. AC_SEARCH_LIBS([log10],[m],[], -[AC_MSG_ERROR(The C Math Library is needed to build the system.)]) +[AC_MSG_ERROR("The C Math Library is needed to build the system.")]) AC_SEARCH_LIBS([NlmThreadsAvailable],[ncbi],[], -[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) +[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) AC_SEARCH_LIBS([SeqAlignNew],[ncbiobj],[], -[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) +[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) AC_SEARCH_LIBS([Blast_RedoOneMatch],[blastcompadj],[], -[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) +[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) AC_SEARCH_LIBS([BioseqBlastEngine],[ncbitool],[], -[AC_MSG_ERROR(The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.)]) +[AC_MSG_ERROR("The NCBI ToolBox is needed to build the system. Information on this API can be found on-line at http://www.ncbi.nlm.nih.gov/IEB/ToolBox/index.cgi. Debian users can add the package libncbi6-dev to fulfill this dependency.")]) + +################ +# MODULE: HDF5 # +################ + +AC_SEARCH_LIBS([__intel_rtc_uninit_use],[irc]) +AC_SEARCH_LIBS([MPI_File_open],[mpi]) +AC_SEARCH_LIBS([compress2],[z]) +AC_SEARCH_LIBS([SZ_BufftoBuffCompress],[sz]) + +AC_SEARCH_LIBS([H5Fcreate],[hdf5],[], +[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")]) + +AC_SEARCH_LIBS([H5TBmake_table],[hdf5_hl],[], +[AC_MSG_ERROR("The HDF5 libraries are needed to build the system.")]) + +################# +# MODULE: PETSc # +################# + +# The AC_SEARCH_LIBS other-libraries list is not particularly useful. +# Autoconf determines if the other-libraries are necessary however in +# the cases where they are indeed necessary they are not actually +# added to the library list. + +# These are the dependencies. Random functions are selected from +# these libraries from the list of otherwise undefined references at +# link time. +AC_SEARCH_LIBS([PetscInitialize],[petsc]) +AC_SEARCH_LIBS([VecNorm],[petscvec]) + +# MatCreateSeqAIJ is acutally used in the code for this project. +AC_SEARCH_LIBS([MatCreateSeqAIJ],[petscmat],[], +[AC_MSG_ERROR("The Portable Extensible Tookit for Scientific Computation PETSc is needed to build the system.")]) AC_OUTPUT diff --git a/doc/build-cobalt.txt b/doc/build-cobalt.txt index 91b356b..76612a4 100644 --- a/doc/build-cobalt.txt +++ b/doc/build-cobalt.txt @@ -19,6 +19,24 @@ Building on NCSA Cobalt. +phdf5-1.8.4 -- Configure -export CPPFLAGS="-I$HDF5_HOME/include -I$NCBI_DIR/include" -export LDFLAGS="-L/usr/apps/hdf/szip/lib -L$HDF5_HOME/lib -L$NCBI_DIR/lib -L/usr/local/intel/10.1.017/lib" -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/apps/hdf/szip/lib +export PETSC_DIR=/u/ac/dpellegr/apps/Installers/petsc-3.0.0-p10 + +export CPPFLAGS="\ +-I$HDF5_HOME/include \ +-I$NCBI_DIR/include \ +-I$PETSC_DIR/include \ +-I$PETSC_DIR/linux-gnu-c-debug/include" + +export LDFLAGS="\ +-L$HDF5_HOME/lib \ +-L/usr/apps/hdf/szip/lib \ +-L/usr/local/intel/10.1.017/lib \ +-L$NCBI_DIR/lib \ +-L$PETSC_DIR/linux-gnu-c-debug/lib" + +export LD_LIBRARY_PATH=\ +$LD_LIBRARY_PATH:\ +/usr/apps/hdf/szip/lib:\ +$PETSC_DIR/linux-gnu-c-debug/lib + + diff --git a/src/Makefile.am b/src/Makefile.am index 752c0fe..407d7cd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,10 +10,9 @@ aggregator_SOURCES = \ model/blast_scores_data_init.c \ model/sequence_data_init.c -aggregator_LDADD = -lirc -lmpi -lsz -lz - updator_SOURCES = \ updator.c \ + assign/assign_blast_scores.c \ assign/assign_protein_type.c \ error/check_error.c \ error/check_h5_error.c \ @@ -21,9 +20,8 @@ updator_SOURCES = \ model/gi_type_data_init.c \ model/sequence_data_init.c -updator_LDADD = -lirc -lmpi -lsz -lz - noinst_HEADERS = \ + assign/assign_blast_scores.h \ assign/assign_protein_type.h \ error/check_error.h \ error/check_h5_error.h \ diff --git a/src/assign/assign_blast_scores.c b/src/assign/assign_blast_scores.c new file mode 100644 index 0000000..1cd491a --- a/dev/null +++ b/src/assign/assign_blast_scores.c @@ -0,0 +1,27 @@ +#include "assign_blast_scores.h" +#include <petscmat.h> +#include <stdio.h> + +void +assign_blast_scores (hid_t file_id) +{ + /* + * Create a sparse matrix for the pairwise protein BLAST scores. + * + * 138,769 proteins x 138,769 proteins = 19,256,835,361 pairs. At 8 + * bytes (double) per pair this will require 154.1 GB of memory to + * hold the full structure. This is available on the compute nodes + * of Cobalt however use of a sparse matrix should reduce this size + * dramatically. + */ + + // http://www.netlib.org/blas/blast-forum/blas_sparse_proto.h + // blas_sparse_matrix M = BLAS_duscr_begin (138769, 138769); + + Mat M; + MatCreateSeqAIJ (PETSC_COMM_SELF, 138769, 138769, 50, NULL, &M); + + MatDestroy (M); + + return; +} diff --git a/src/assign/assign_blast_scores.h b/src/assign/assign_blast_scores.h new file mode 100644 index 0000000..2c52192 --- a/dev/null +++ b/src/assign/assign_blast_scores.h @@ -0,0 +1,12 @@ +#ifndef ASSIGN_BLAST_SCORES_H +#define ASSIGN_BLAST_SCORES_H + +#include <hdf5.h> + +/* + * Pairwise BLAST of each protein in the NCBI Influenza Sequence + * Database against the full database. + */ +void assign_blast_scores (hid_t file_id); + +#endif // ASSIGN_BLAST_SCORES_H diff --git a/src/updator.c b/src/updator.c index f93e205..0bade65 100644 --- a/src/updator.c +++ b/src/updator.c @@ -3,16 +3,25 @@ */ #include "assign/assign_protein_type.h" +#include "assign/assign_blast_scores.h" #include "error/check_h5_error.h" +#include <petsc.h> #include <stdio.h> #include <signal.h> #define FILE "influenza.h5" int -main () +main (int argc, char **argv) { /* + * Initialize the PETSc database and MPI. + * + * http://www.mcs.anl.gov/petsc/petsc-2/snapshots/petsc-dev/docs/manualpages/Sys/PetscInitialize.html#PetscInitialize + */ + PetscInitialize (&argc, &argv, 0, 0); + + /* * Open the HDF5 file. */ hid_t file_id = H5Fopen (FILE, H5F_ACC_RDWR, H5P_DEFAULT); @@ -22,7 +31,14 @@ main () /* * Assign protein type values to the sequence records. */ + /* assign_protein_type (file_id); + */ + + /* + * Assign pairwise BLAST scores. + */ + assign_blast_scores (file_id); /* * Close the HDF5 file. @@ -31,5 +47,7 @@ main () if (status < 0) check_h5_error (__FILE__, __LINE__); + PetscFinalize (); + return 0; } |