path:
root/
analysis/
year.R (
plain)
blob: 37310d5cc21b46f9e93eb12d05eeab3d4fe64f7e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# Explore the qualities of the year feature.
require(hdf5);
hdf5load("/home/don/exp007/src/influenza.h5", tidy = TRUE);
A <- merge (influenza.aa.dat, influenza.faa,
by.x = "GenBank accession number",
by.y = "GB");
B <- merge (A, gi.type.data,
by.x = "GI",
by.y = "GI");
# Compare the local copy with a query performed on the NCBI database.
# A quick check of the number of records returned and the first and
# last set of GB values in sorted order should not show any
# inconsistencies.
T <- B[B$Year == 1978 & B$Type == "A" & B$Protein == "HA", ];
nrow (T);
U <- T$"GenBank accession number";
sort (U);
# All records for 1918. Based on code from
# http://wiki.r-project.org/rwiki/doku.php?id=tips:data-frames:select_observations
C <- B[B$Year == 1918, ]
summary (C);
# Countries represented in the 1918 dataset.
C$Country;
C[C$Protein == "HA", ]
# All record with a year value.
E <- A[A$Year != 0, ];
hist(E$Year);
|