たぶんWindowsだけだと思うんですけど、最近パッケージをインストールしようとするとこんな警告出ませんか?
Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/src/contrib: cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/src/contrib/PACKAGES'
軽くぐぐってみたところ、この警告を見ている人は多いようです。
ちょっと気になって調べると、これはCRANではなく「CRANextra」というレポジトリ(パッケージのインデックスだけ?)のようです。CRANextraは、R 3.1.0のNEWSに、
The CRANextra repository is no longer a default repository on Windows: all the binary versions of packages from CRAN are now on CRAN, although CRANextra contains packages from Omegahat and elsewhere used by CRAN packages.
(https://cran.r-project.org/doc/manuals/r-release/NEWS.html)
とかって書かれていました。そういえばあまり知られていないかもしれませんが、CRANのパッケージはCRAN以外のレポジトリのパッケージにも依存することが許されています。
具体的にどういうパッケージがDependsやImportsに指定されているのか気になったのでちょっと調べてみたときのメモ。
パッケージ一覧を取得
まずはavailable.packages()
を取ってきます。
cran_pkg <- available.packages() #> Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/src/contrib: #> cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/src/contrib/PACKAGES'
中身を覗くとdata.frameっぽいですが、これはmatrixです。
head(cran_pkg) #> Package Version Priority #> A3 "A3" "1.0.0" NA #> abbyyR "abbyyR" "0.5.1" NA #> abc "abc" "2.1" NA #> abc.data "abc.data" "1.0" NA #> ABC.RAP "ABC.RAP" "0.9.0" NA #> ABCanalysis "ABCanalysis" "1.2.1" NA #> Depends #> A3 "R (>= 2.15.0), xtable, pbapply" #> abbyyR "R (>= 3.2.0)" #> abc "R (>= 2.10), abc.data, nnet, quantreg, MASS, locfit" #> abc.data "R (>= 2.10)" #> ABC.RAP "R (>= 3.1.0)" #> ABCanalysis "R (>= 2.10)" #> Imports LinkingTo #> A3 NA NA #> abbyyR "httr, XML, curl, readr, plyr, progress" NA #> abc NA NA #> abc.data NA NA #> ABC.RAP "graphics, stats, utils" NA #> ABCanalysis "plotrix" NA #> Suggests Enhances #> A3 "randomForest, e1071" NA #> abbyyR "testthat, rmarkdown, knitr (>= 1.11)" NA #> abc NA NA #> abc.data NA NA #> ABC.RAP "knitr, rmarkdown" NA #> ABCanalysis NA NA #> License License_is_FOSS License_restricts_use #> A3 "GPL (>= 2)" NA NA #> abbyyR "MIT + file LICENSE" NA NA #> abc "GPL (>= 3)" NA NA #> abc.data "GPL (>= 3)" NA NA #> ABC.RAP "GPL-3" NA NA #> ABCanalysis "GPL-3" NA NA #> OS_type Archs MD5sum NeedsCompilation File #> A3 NA NA NA "no" NA #> abbyyR NA NA NA "no" NA #> abc NA NA NA "no" NA #> abc.data NA NA NA "no" NA #> ABC.RAP NA NA NA "no" NA #> ABCanalysis NA NA NA "no" NA #> Repository #> A3 "https://cran.rstudio.com/src/contrib" #> abbyyR "https://cran.rstudio.com/src/contrib" #> abc "https://cran.rstudio.com/src/contrib" #> abc.data "https://cran.rstudio.com/src/contrib" #> ABC.RAP "https://cran.rstudio.com/src/contrib" #> ABCanalysis "https://cran.rstudio.com/src/contrib" is(cran_pkg) #> [1] "matrix" "array" "structure" "vector"
Depends
とかの列がどうなっているか見ると、文字列になっています。
is(cran_pkg[1,"Depends"]) #> [1] "character" "vector" "data.frameRowLabels" #> [4] "SuperClassMethod"
前処理
Dependsはpkg1, pkg2 (>= 0.1), pkg3
とかなっているので、ここからパッケージ名のunique()
を取る関数をつくります。
library(magrittr) library(stringr) get_unique_pkg <- . %>% purrr::discard(is.na) %>% # (>= 0.3) のようなバージョン指定を削除。改行を挟むことがあるのでdotall = TRUEが必要 str_replace_all(regex("\\(.*\\)", dotall = TRUE), "") %>% # 最後に,がある場合もあるので削除しておく str_replace_all(",$", "") %>% # ,で分割 str_split(",") %>% # listになっているので平らにする purrr::flatten_chr() %>% # 空白があったりするので削除 str_trim %>% unique
これをそれぞれのフィールドに適用します。
depends <- get_unique_pkg(cran_pkg[, "Depends"]) imports <- get_unique_pkg(cran_pkg[, "Imports"]) linkingto <- get_unique_pkg(cran_pkg[, "LinkingTo"]) suggests <- get_unique_pkg(cran_pkg[, "Suggests"]) enhances <- get_unique_pkg(cran_pkg[, "Enhances"])
CRANの全パッケージ名
CRANのパッケージは先ほどのmatrixからPackage
という列を取り出します。
これに加えて、baseのパッケージ(インストール時からRに付属しているパッケージ)はCRAN上にはないので、別途もってくる必要があります。これはinstalled.packages(priority="base")
でavailable.packages()
と同じようなmatrixが得られるのでそれを使います。
cran_pkg_names <- unname(cran_pkg[, "Package"]) base_pkg_names <- unname(installed.packages(priority="base")[, "Package"]) pkg_names <- c(base_pkg_names, cran_pkg_names, "R")
結果
Depends
名前でぐぐってみると、全部Bioconductorのパッケージみたいでした。
purrr::discard(depends, `%in%`, table = pkg_names) #> [1] "SAGx" "edgeR" "sva" #> [4] "affyPLM" "beadarray" "geneplotter" #> [7] "qvalue" "DNAcopy" "Biobase" #> [10] "GEOquery" "limma" "multtest" #> [13] "RedeR" "MLInterfaces" "pcaMethods" #> [16] "graph" "Rgraphviz" "supraHex" #> [19] "DESeq2" "GenomicRanges" "impute" #> [22] "flowWorkspace" "flowCore" "breastCancerVDX" #> [25] "EBImage" "rtracklayer" "Icens" #> [28] "Biostrings" "RBGL" "affy" #> [31] "seqLogo" "GSEABase" "genefilter" #> [34] "biomaRt" "GSVA" "mzR" #> [37] "xcms" "IRanges" "LBE" #> [40] "qtbase" "fmcsR" "ChemmineR" #> [43] "fabia" "iBBiG" "copynumber" #> [46] "ShortRead" "zlibbioc" "Rsamtools" #> [49] "survcomp" "DESeq" "snpStats"
Imports
purrr::discard(imports, `%in%`, table = pkg_names) #> [1] "limma" "DESeq" #> [3] "graph" "genefilter" #> [5] "Biobase" "multtest" #> [7] "edgeR" "aroma.light" #> [9] "EBImage" "RDAVIDWebService" #> [11] "affy" "Rgraphviz" #> [13] "fabia" "biomaRt" #> [15] "Biostrings" "AnnotationDbi" #> [17] "qvalue" "DESeq2" #> [19] "sva" "DNAcopy" #> [21] "marray" "GenomicRanges" #> [23] "GenomeInfoDb" "S4Vectors" #> [25] "IRanges" "xcms" #> [27] "chopsticks" "RBGL" #> [29] "SNPRelate" "BSgenome" #> [31] "BSgenome.Hsapiens.UCSC.hg19" "affxparser" #> [33] "oligo" "ALL" #> [35] "GOstats" "iCheck" #> [37] "GSEAlm" "globaltest" #> [39] "siggenes" "gcrma" #> [41] "BiocParallel" "Rsamtools" #> [43] "GenomicAlignments" "gage" #> [45] "KEGGREST" "snpStats" #> [47] "BiocGenerics" "rtracklayer" #> [49] "illuminaio" "preprocessCore" #> [51] "BiocInstaller" "OmicCircos" #> [53] "HiTC" "SummarizedExperiment" #> [55] "GenomicFeatures" "a4Core" #> [57] "Rdisop" "lfa" #> [59] "org.Hs.eg.db" "KEGGgraph" #> [61] "GOSemSim" "pcaMethods" #> [63] "pdInfoBuilder" "convert" #> [65] "GEOquery" "GEOmetadb" #> [67] "annotate" "GO.db" #> [69] "simpleaffy" "impute" #> [71] "beadarray" "lumi" #> [73] "ssize" "Category" #> [75] "GlobalAncova" "phyloseq" #> [77] "minet" "survcomp" #> [79] "qtbase" "widgetTools" #> [81] "XVector" "qpgraph" #> [83] "MassSpecWavelet" "MAIT" #> [85] "CMA" "CNTools" #> [87] "topGO" "supraHex" #> [89] "rPython"
LinkingTo
これはあんまり使われてないみたいです。
purrr::discard(linkingto, `%in%`, table = pkg_names) #> [1] "qtbase"
Suggests
Suggestsはインストールされないので?けっこう適当なパッケージが名を連ねています。例えば下の方にAnomalyDetection
というパッケージがありますが、これはanomalyDetection
(先頭が小文字)の間違いです。BreakoutDetection
はリリースされていないパッケージ名です。こんなんで許されるのか…
purrr::discard(suggests, `%in%`, table = pkg_names) #> [1] "INLA" #> [2] "Rgraphviz" #> [3] "BiocStyle" #> [4] "DNAcopy" #> [5] "Biostrings" #> [6] "Biobase" #> [7] "BiocGenerics" #> [8] "kebabs" #> [9] "affxparser" #> [10] "aroma.light" #> [11] "graph" #> [12] "cacheSweave" #> [13] "weaver" #> [14] "mzR" #> [15] "ArrayExpress" #> [16] "doMC" #> [17] "beadarrayExampleData" #> [18] "lumi" #> [19] "GO.db" #> [20] "IHW" #> [21] "RQuantLib" #> [22] "bigrf" #> [23] "sangerseqR" #> [24] "phyloseq" #> [25] "GenomicRanges" #> [26] "IRanges" #> [27] "S4Vectors" #> [28] "limma" #> [29] "GLAD" #> [30] "multtest" #> [31] "gurobi" #> [32] "annotate" #> [33] "moe430a.db" #> [34] "CGHcall" #> [35] "CGHregions" #> [36] "CNVtools" #> [37] "CGHbase" #> [38] "snpStats" #> [39] "inSilicoDb" #> [40] "genefilter" #> [41] "GEOmetadb" #> [42] "GSVA" #> [43] "mogene10sttranscriptcluster.db" #> [44] "qvalue" #> [45] "Rhipe" #> [46] "VariantAnnotation" #> [47] "Rdonlp2" #> [48] "RDCOMClient" #> [49] "impute" #> [50] "GOstats" #> [51] "org.Hs.eg.db" #> [52] "AnnotationDbi" #> [53] "asreml" #> [54] "rPython" #> [55] "ComplexHeatmap" #> [56] "hgu133plus2.db" #> [57] "Category" #> [58] "vsn" #> [59] "GSEABase" #> [60] "ggtree" #> [61] "faahKO" #> [62] "RCytoscape" #> [63] "GEOquery" #> [64] "golubEsets" #> [65] "coxKM" #> [66] "its" #> [67] "fUnitRoots" #> [68] "RSvgDevice" #> [69] "ActuDistns" #> [70] "Rsocp" #> [71] "Rnlminb2" #> [72] "genderdata" #> [73] "RHugin" #> [74] "KEGG.db" #> [75] "reactome.db" #> [76] "GlobalAncova" #> [77] "globaltest" #> [78] "DESeq" #> [79] "BSgenome.Hsapiens.UCSC.hg18.masked" #> [80] "BSgenome.Hsapiens.UCSC.hg19.masked" #> [81] "preprocessCore" #> [82] "Sxslt" #> [83] "taxstats" #> [84] "brassicaData" #> [85] "BSgenome.Hsapiens.UCSC.hg19" #> [86] "glmmADMB" #> [87] "marray" #> [88] "affy" #> [89] "XMLRPC" #> [90] "HTSFilter" #> [91] "hurricaneexposuredata" #> [92] "SWAT2R" #> [93] "PROcess" #> [94] "ie2miscdata" #> [95] "rsprng" #> [96] "MLP" #> [97] "biomaRt" #> [98] "a4Base" #> [99] "rtracklayer" #> [100] "GenomeInfoDb" #> [101] "EBImage" #> [102] "ASGSCA" #> [103] "annaffy" #> [104] "hgu133a.db" #> [105] "hgu95av2.db" #> [106] "doSMP" #> [107] "RTisean" #> [108] "grasp2db" #> [109] "microcontax.data" #> [110] "GeneSelector" #> [111] "corpus.useR.2008.abstracts" #> [112] "rhdf5" #> [113] "org.Mm.eg.db" #> [114] "xcms" #> [115] "BiocInstaller" #> [116] "graphite" #> [117] "NFPdata" #> [118] "synchronicity" #> [119] "unix" #> [120] "openNLPmodels.en" #> [121] "org.At.tair.db" #> [122] "hu6800.db" #> [123] "dendextendRcpp" #> [124] "seqLogo" #> [125] "RankProd" #> [126] "OCplus" #> [127] "ggsubplot" #> [128] "ReportingTools" #> [129] "latticist" #> [130] "edgeR" #> [131] "polmineR.Rcpp" #> [132] "europarl.en" #> [133] "rcqp" #> [134] "qtbase" #> [135] "ALDEx2" #> [136] "GOSemSim" #> [137] "QCA3" #> [138] "biom" #> [139] "rcom" #> [140] "rpvm" #> [141] "KEGGgraph" #> [142] "Heatplus" #> [143] "rgurobi" #> [144] "RBGL" #> [145] "pkgDepTools" #> [146] "ROpenOffice" #> [147] "RcppMsgPack" #> [148] "Rcompression" #> [149] "Icens" #> [150] "hyperdraw" #> [151] "hypergraph" #> [152] "pmg" #> [153] "SVGAnnotation" #> [154] "topGO" #> [155] "RDRToolbox" #> [156] "rrdf" #> [157] "SSOAP" #> [158] "ctc" #> [159] "org.Sc.sgd.db" #> [160] "BSgenome.Hsapiens.UCSC.hg38" #> [161] "clipper" #> [162] "fastshp" #> [163] "sva" #> [164] "DESeq2" #> [165] "Rcampdf" #> [166] "Rpoppler" #> [167] "tm.lexicon.GeneralInquirer" #> [168] "corpus.JSS.papers" #> [169] "rmongodb" #> [170] "vaers" #> [171] "vaersND" #> [172] "minet" #> [173] "AnomalyDetection" #> [174] "BreakoutDetection" #> [175] "zdat"
Enhances
purrr::discard(enhances, `%in%`, table = pkg_names) #> [1] "asreml" "rPython" "doMC" #> [4] "synchronicity" "rkward" "INLA" #> [7] "dggrids" "its" "graph" #> [10] "gurobi" "BiocInstaller" "pbdCS" #> [13] "KEGG.db" "org.Hs.eg.db" "org.Sc.sgd.db" #> [16] "org.Ag.eg.db" "org.Pt.eg.db" "org.Rn.eg.db" #> [19] "org.Ss.eg.db" "org.At.tair.db" "org.Bt.eg.db" #> [22] "org.Ce.eg.db" "org.Cf.eg.db" "org.Dm.eg.db" #> [25] "org.Dr.eg.db" "org.EcK12.eg.db" "org.EcSakai.eg.db" #> [28] "org.Gg.eg.db" "org.Mm.eg.db" "org.Mmu.eg.db" #> [31] "org.Pf.plasmo.db" "org.Xl.eg.db" "rjpod" #> [34] "kmndirs"
感想
まあやっぱBioconductorだよなーという感じでした。
Bioconductorのパッケージもavailable.packages("http://bioconductor.org/packages/3.5/bioc/src/contrib")
(バージョン名はよくわからない)とかで一覧が取得できるみたいなんですが、一部取れないやつがあってよくわかりませんでした。