Selection of optimal parameters

Coral

WGCNA: Define thresholding power

Library

library(WGCNA)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /yshare1/home/toru/anaconda3/envs/wgcna/lib/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=ja_JP.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=ja_JP.UTF-8        LC_COLLATE=ja_JP.UTF-8    
##  [5] LC_MONETARY=ja_JP.UTF-8    LC_MESSAGES=ja_JP.UTF-8   
##  [7] LC_PAPER=ja_JP.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=ja_JP.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] WGCNA_1.69            fastcluster_1.1.25    dynamicTreeCut_1.63-1
## [4] rmarkdown_1.12       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1            lattice_0.20-38       GO.db_3.10.0         
##  [4] png_0.1-7             digest_0.6.18         foreach_1.5.0        
##  [7] R6_2.4.1              backports_1.1.7       acepack_1.4.1        
## [10] stats4_3.6.1          RSQLite_2.2.0         evaluate_0.13        
## [13] ggplot2_3.3.1         pillar_1.4.4          rlang_0.4.6          
## [16] data.table_1.12.8     rstudioapi_0.11       blob_1.2.1           
## [19] S4Vectors_0.24.4      rpart_4.1-15          Matrix_1.2-17        
## [22] preprocessCore_1.48.0 checkmate_2.0.0       splines_3.6.1        
## [25] stringr_1.4.0         foreign_0.8-71        htmlwidgets_1.5.1    
## [28] bit_1.1-15.2          munsell_0.5.0         compiler_3.6.1       
## [31] xfun_0.6              pkgconfig_2.0.3       BiocGenerics_0.32.0  
## [34] base64enc_0.1-3       htmltools_0.3.6       nnet_7.3-12          
## [37] tibble_3.0.1          gridExtra_2.3         htmlTable_1.13.3     
## [40] Hmisc_4.4-0           IRanges_2.20.2        codetools_0.2-16     
## [43] matrixStats_0.56.0    crayon_1.3.4          grid_3.6.1           
## [46] gtable_0.3.0          lifecycle_0.2.0       DBI_1.1.0            
## [49] magrittr_1.5          scales_1.1.1          impute_1.60.0        
## [52] stringi_1.4.3         doParallel_1.0.15     latticeExtra_0.6-29  
## [55] ellipsis_0.3.1        vctrs_0.3.0           Formula_1.2-3        
## [58] RColorBrewer_1.1-2    iterators_1.0.12      tools_3.6.1          
## [61] bit64_0.9-7           Biobase_2.46.0        glue_1.3.1           
## [64] jpeg_0.1-8.1          parallel_3.6.1        survival_3.1-12      
## [67] yaml_2.2.0            AnnotationDbi_1.48.0  colorspace_1.4-1     
## [70] cluster_2.0.8         memoise_1.1.0         knitr_1.22

Load files

# matFPKM.c : Gene expression table (FPKM level) of corals
# matTPM.c : Gene expression table (TPM level) of corals
# meta.c : Metadata of coral samples
load("Expression_Coral.UT.RData")

Preprocessing

Remove low-expressed genes

以下のページを参考に低発現遺伝子を除外する。ここではFPKM>1で5割のサンプルから検出されている遺伝子を選んだ。

Reference: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html
4. Can WGCNA be used to analyze RNA-Seq data?
We suggest removing features whose counts are consistently low (for example, removing all features that have a count of less than say 10 in more than 90% of the samples) because such low-expressed features tend to reflect noise and correlations based on counts that are mostly zero aren't really meaningful. 
n_detected = rowSums(matFPKM.c > 1)
detected = (n_detected / ncol(matFPKM.c) > 0.5)
table(detected) 
## detected
## FALSE  TRUE 
## 23321 16699
mat.c = matFPKM.c[detected,] # choose mostly-detected genes
sum(apply(mat.c, 1, var) == 0) # check whether zero-variance gene is found
## [1] 0

Log2-transformation

mat.c = log2(mat.c + 1)
mat.c = t(mat.c) # transpose

WGCNA: Choose adequate soft-thresholding power

Reference: Step-by-step network construction and module detection
  https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf
powers = 1:20
sft.c = pickSoftThreshold(mat.c, 
                          powerVector = powers, 
                          verbose = 5,
                          networkType = "signed") # Signed network analysis
## pickSoftThreshold: will use block size 2679.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 2679 of 16699
## Warning: executing %dopar% sequentially: no parallel backend registered
##    ..working on genes 2680 through 5358 of 16699
##    ..working on genes 5359 through 8037 of 16699
##    ..working on genes 8038 through 10716 of 16699
##    ..working on genes 10717 through 13395 of 16699
##    ..working on genes 13396 through 16074 of 16699
##    ..working on genes 16075 through 16699 of 16699
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.63600  6.140          0.933  8780.0   8830.00  10100
## 2      2  0.00754  0.254          0.907  4920.0   4870.00   6650
## 3      3  0.36700 -1.600          0.862  2900.0   2780.00   4710
## 4      4  0.54000 -1.930          0.850  1790.0   1640.00   3490
## 5      5  0.64900 -2.110          0.862  1150.0    994.00   2660
## 6      6  0.72600 -2.180          0.890   758.0    618.00   2090
## 7      7  0.73700 -2.240          0.887   516.0    399.00   1670
## 8      8  0.75800 -2.220          0.900   360.0    264.00   1360
## 9      9  0.76500 -2.210          0.906   257.0    179.00   1120
## 10    10  0.77400 -2.210          0.914   187.0    124.00    930
## 11    11  0.79600 -2.180          0.931   139.0     86.60    781
## 12    12  0.79200 -2.190          0.932   105.0     61.50    661
## 13    13  0.79800 -2.180          0.940    79.9     44.30    564
## 14    14  0.81000 -2.160          0.950    61.8     32.20    484
## 15    15  0.81700 -2.140          0.957    48.4     23.70    417
## 16    16  0.82500 -2.130          0.964    38.3     17.60    362
## 17    17  0.82900 -2.110          0.968    30.6     13.20    315
## 18    18  0.82800 -2.100          0.967    24.6     10.00    275
## 19    19  0.83300 -2.080          0.970    20.0      7.65    241
## 20    20  0.83600 -2.060          0.973    16.4      5.88    212
sft.c
## $powerEstimate
## [1] NA
## 
## $fitIndices
##    Power    SFT.R.sq      slope truncated.R.sq    mean.k.   median.k.
## 1      1 0.636015761  6.1394748      0.9334311 8775.05249 8831.283111
## 2      2 0.007537047  0.2535258      0.9069594 4915.12183 4872.381459
## 3      3 0.367034640 -1.5954669      0.8619967 2900.49641 2782.560107
## 4      4 0.539533308 -1.9299624      0.8503500 1788.50077 1639.883356
## 5      5 0.649046240 -2.1088255      0.8622259 1145.18619  993.997367
## 6      6 0.725569395 -2.1803677      0.8897642  757.66787  617.571823
## 7      7 0.737363653 -2.2369381      0.8873196  515.85393  398.523326
## 8      8 0.758149710 -2.2194124      0.8996561  360.19136  264.274470
## 9      9 0.764659329 -2.2141017      0.9055804  257.17822  179.293912
## 10    10 0.773828696 -2.2096717      0.9138145  187.30323  123.753766
## 11    11 0.795795432 -2.1807600      0.9306162  138.84565   86.618236
## 12    12 0.792022423 -2.1922162      0.9322456  104.56527   61.483275
## 13    13 0.798088499 -2.1778465      0.9399064   79.87501   44.279191
## 14    14 0.809760349 -2.1570168      0.9500571   61.80090   32.246538
## 15    15 0.816984246 -2.1439052      0.9573274   48.37382   23.720143
## 16    16 0.824704730 -2.1282369      0.9638006   38.26456   17.609821
## 17    17 0.828549255 -2.1123552      0.9678029   30.55985   13.201554
## 18    18 0.827928825 -2.1004325      0.9671891   24.62187   10.005280
## 19    19 0.833130591 -2.0810743      0.9702946   19.99846    7.652881
## 20    20 0.836086602 -2.0611295      0.9725581   16.36459    5.881468
##        max.k.
## 1  10079.1895
## 2   6652.2951
## 3   4710.6018
## 4   3485.4486
## 5   2664.3989
## 6   2088.5624
## 7   1669.9980
## 8   1356.8687
## 9   1117.0422
## 10   929.7508
## 11   781.0868
## 12   661.4486
## 13   564.0304
## 14   483.8962
## 15   417.3943
## 16   361.7750
## 17   314.9358
## 18   275.2474
## 19   241.4312
## 20   212.4739
save(mat.c, sft.c, file="2.1.c_WGCNA_ThresholdingPower.UT.RData")

Zooxanthellae

WGCNA: Define thresholding power

Library

library(WGCNA)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /yshare1/home/toru/anaconda3/envs/wgcna/lib/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=ja_JP.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=ja_JP.UTF-8        LC_COLLATE=ja_JP.UTF-8    
##  [5] LC_MONETARY=ja_JP.UTF-8    LC_MESSAGES=ja_JP.UTF-8   
##  [7] LC_PAPER=ja_JP.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=ja_JP.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] WGCNA_1.69            fastcluster_1.1.25    dynamicTreeCut_1.63-1
## [4] rmarkdown_1.12       
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1            lattice_0.20-38       GO.db_3.10.0         
##  [4] png_0.1-7             digest_0.6.18         foreach_1.5.0        
##  [7] R6_2.4.1              backports_1.1.7       acepack_1.4.1        
## [10] stats4_3.6.1          RSQLite_2.2.0         evaluate_0.13        
## [13] ggplot2_3.3.1         pillar_1.4.4          rlang_0.4.6          
## [16] data.table_1.12.8     rstudioapi_0.11       blob_1.2.1           
## [19] S4Vectors_0.24.4      rpart_4.1-15          Matrix_1.2-17        
## [22] preprocessCore_1.48.0 checkmate_2.0.0       splines_3.6.1        
## [25] stringr_1.4.0         foreign_0.8-71        htmlwidgets_1.5.1    
## [28] bit_1.1-15.2          munsell_0.5.0         compiler_3.6.1       
## [31] xfun_0.6              pkgconfig_2.0.3       BiocGenerics_0.32.0  
## [34] base64enc_0.1-3       htmltools_0.3.6       nnet_7.3-12          
## [37] tibble_3.0.1          gridExtra_2.3         htmlTable_1.13.3     
## [40] Hmisc_4.4-0           IRanges_2.20.2        codetools_0.2-16     
## [43] matrixStats_0.56.0    crayon_1.3.4          grid_3.6.1           
## [46] gtable_0.3.0          lifecycle_0.2.0       DBI_1.1.0            
## [49] magrittr_1.5          scales_1.1.1          impute_1.60.0        
## [52] stringi_1.4.3         doParallel_1.0.15     latticeExtra_0.6-29  
## [55] ellipsis_0.3.1        vctrs_0.3.0           Formula_1.2-3        
## [58] RColorBrewer_1.1-2    iterators_1.0.12      tools_3.6.1          
## [61] bit64_0.9-7           Biobase_2.46.0        glue_1.3.1           
## [64] jpeg_0.1-8.1          parallel_3.6.1        survival_3.1-12      
## [67] yaml_2.2.0            AnnotationDbi_1.48.0  colorspace_1.4-1     
## [70] cluster_2.0.8         memoise_1.1.0         knitr_1.22

Load files

# matFPKM.z : Gene expression table (FPKM level) of zooxanthellae
# matTPM.z : Gene expression table (TPM level) of zooxanthellae
load("Expression.UT.RData")

Preprocessing

Remove low-expressed genes

以下のページを参考に低発現遺伝子を除外する。ここではFPKM>1で5割のサンプルから検出されている遺伝子を選んだ。

Reference: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html
4. Can WGCNA be used to analyze RNA-Seq data?
We suggest removing features whose counts are consistently low (for example, removing all features that have a count of less than say 10 in more than 90% of the samples) because such low-expressed features tend to reflect noise and correlations based on counts that are mostly zero aren't really meaningful. 
n_detected = rowSums(matFPKM.z > 1)
detected = (n_detected / ncol(matFPKM.z) > 0.5)
table(detected) 
## detected
## FALSE  TRUE 
##  6320 39284
mat.z = matFPKM.z[detected,] # choose mostly-detected genes
sum(apply(mat.z, 1, var) == 0) # check whether zero-variance gene is found
## [1] 0

Log2-transformation

mat.z = log2(mat.z + 1)
mat.z = t(mat.z) # transpose

WGCNA: Choose adequate soft-thresholding power

Reference: Step-by-step network construction and module detection
  https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/FemaleLiver-02-networkConstr-man.pdf
powers = 1:30
sft.z = pickSoftThreshold(mat.z, 
                          powerVector = powers, 
                          verbose = 5,
                          networkType = "signed") # Signed network analysis
## pickSoftThreshold: will use block size 1138.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1138 of 39284
## Warning: executing %dopar% sequentially: no parallel backend registered
##    ..working on genes 1139 through 2276 of 39284
##    ..working on genes 2277 through 3414 of 39284
##    ..working on genes 3415 through 4552 of 39284
##    ..working on genes 4553 through 5690 of 39284
##    ..working on genes 5691 through 6828 of 39284
##    ..working on genes 6829 through 7966 of 39284
##    ..working on genes 7967 through 9104 of 39284
##    ..working on genes 9105 through 10242 of 39284
##    ..working on genes 10243 through 11380 of 39284
##    ..working on genes 11381 through 12518 of 39284
##    ..working on genes 12519 through 13656 of 39284
##    ..working on genes 13657 through 14794 of 39284
##    ..working on genes 14795 through 15932 of 39284
##    ..working on genes 15933 through 17070 of 39284
##    ..working on genes 17071 through 18208 of 39284
##    ..working on genes 18209 through 19346 of 39284
##    ..working on genes 19347 through 20484 of 39284
##    ..working on genes 20485 through 21622 of 39284
##    ..working on genes 21623 through 22760 of 39284
##    ..working on genes 22761 through 23898 of 39284
##    ..working on genes 23899 through 25036 of 39284
##    ..working on genes 25037 through 26174 of 39284
##    ..working on genes 26175 through 27312 of 39284
##    ..working on genes 27313 through 28450 of 39284
##    ..working on genes 28451 through 29588 of 39284
##    ..working on genes 29589 through 30726 of 39284
##    ..working on genes 30727 through 31864 of 39284
##    ..working on genes 31865 through 33002 of 39284
##    ..working on genes 33003 through 34140 of 39284
##    ..working on genes 34141 through 35278 of 39284
##    ..working on genes 35279 through 36416 of 39284
##    ..working on genes 36417 through 37554 of 39284
##    ..working on genes 37555 through 38692 of 39284
##    ..working on genes 38693 through 39284 of 39284
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1  0.95000  4.3100          0.953 24500.0   25400.0  29000
## 2      2  0.82800  1.6400          0.783 16200.0   17100.0  22600
## 3      3  0.40300  0.6380          0.432 11200.0   11700.0  18100
## 4      4  0.00667  0.0669          0.248  7940.0    8230.0  14700
## 5      5  0.10800 -0.2940          0.440  5780.0    5850.0  12200
## 6      6  0.29500 -0.5630          0.607  4300.0    4220.0  10200
## 7      7  0.41600 -0.7530          0.701  3250.0    3070.0   8640
## 8      8  0.49900 -0.9040          0.765  2500.0    2250.0   7380
## 9      9  0.55800 -1.0200          0.810  1950.0    1670.0   6350
## 10    10  0.60700 -1.1100          0.844  1530.0    1250.0   5510
## 11    11  0.63900 -1.2100          0.864  1220.0     939.0   4800
## 12    12  0.66500 -1.2800          0.881   981.0     711.0   4210
## 13    13  0.68000 -1.3500          0.891   795.0     541.0   3700
## 14    14  0.70100 -1.4000          0.906   649.0     415.0   3270
## 15    15  0.71400 -1.4500          0.913   534.0     320.0   2900
## 16    16  0.72500 -1.5000          0.921   442.0     248.0   2590
## 17    17  0.74000 -1.5200          0.931   368.0     193.0   2310
## 18    18  0.75200 -1.5500          0.939   309.0     151.0   2070
## 19    19  0.76800 -1.5700          0.948   260.0     118.0   1860
## 20    20  0.77700 -1.5900          0.953   220.0      93.3   1670
## 21    21  0.78500 -1.6100          0.958   187.0      74.0   1510
## 22    22  0.79300 -1.6200          0.963   159.0      58.9   1370
## 23    23  0.80000 -1.6400          0.967   137.0      47.1   1240
## 24    24  0.80500 -1.6600          0.969   117.0      37.9   1130
## 25    25  0.81400 -1.6600          0.974   101.0      30.5   1030
## 26    26  0.82200 -1.6700          0.976    87.7      24.6    937
## 27    27  0.82900 -1.6800          0.979    76.1      20.0    856
## 28    28  0.83400 -1.6900          0.980    66.3      16.3    784
## 29    29  0.83700 -1.6900          0.983    57.9      13.3    718
## 30    30  0.84000 -1.7000          0.984    50.7      10.9    661
sft.z
## $powerEstimate
## [1] 1
## 
## $fitIndices
##    Power    SFT.R.sq       slope truncated.R.sq     mean.k.   median.k.
## 1      1 0.950355436  4.31405293      0.9531192 24503.85086 25422.30150
## 2      2 0.827543211  1.63718305      0.7834519 16205.78944 17054.48510
## 3      3 0.403033472  0.63808513      0.4322007 11165.88150 11735.93827
## 4      4 0.006671372  0.06686317      0.2481058  7935.96869  8228.70388
## 5      5 0.108426753 -0.29352353      0.4397809  5782.13257  5851.77908
## 6      6 0.294731172 -0.56257169      0.6068116  4300.39997  4217.68996
## 7      7 0.415813355 -0.75250993      0.7011965  3254.73272  3067.39583
## 8      8 0.499315567 -0.90399401      0.7654386  2500.80099  2252.32583
## 9      9 0.558166370 -1.01932530      0.8101000  1947.09857  1669.86735
## 10    10 0.606907497 -1.11362015      0.8441644  1533.85221  1247.37269
## 11    11 0.639389080 -1.20684654      0.8643031  1221.01785   938.57902
## 12    12 0.665027881 -1.27870865      0.8814067   981.17651   711.38180
## 13    13 0.679616508 -1.34944879      0.8907980   795.19077   541.43573
## 14    14 0.700946489 -1.40230616      0.9057551   649.47429   415.40850
## 15    15 0.714348448 -1.45357032      0.9131524   534.23352   319.80937
## 16    16 0.724948163 -1.49558670      0.9205157   442.31096   247.88717
## 17    17 0.739620984 -1.52325511      0.9305581   368.40984   192.93024
## 18    18 0.752313451 -1.55056501      0.9389893   308.56523   150.95174
## 19    19 0.767718674 -1.57004084      0.9481615   259.77786   118.05191
## 20    20 0.776584732 -1.59281747      0.9528751   219.75689    93.29782
## 21    21 0.784670435 -1.61059715      0.9577007   186.73681    73.99915
## 22    22 0.792617962 -1.62322632      0.9628757   159.34562    58.93977
## 23    23 0.800169526 -1.64007420      0.9669684   136.50889    47.12940
## 24    24 0.805218125 -1.65573482      0.9693642   117.37905    37.91399
## 25    25 0.813842797 -1.66206086      0.9737209   101.28305    30.51697
## 26    26 0.821983494 -1.67186736      0.9764177    87.68295    24.64757
## 27    27 0.828877703 -1.67945788      0.9789746    76.14628    20.01177
## 28    28 0.833878051 -1.68672369      0.9801789    66.32341    16.30415
## 29    29 0.836735192 -1.69244809      0.9827587    57.93014    13.34331
## 30    30 0.839642049 -1.69896991      0.9842438    50.73436    10.94009
##        max.k.
## 1  29008.7163
## 2  22564.7847
## 3  18059.9742
## 4  14735.8653
## 5  12199.6187
## 6  10218.0438
## 7   8641.3106
## 8   7377.8395
## 9   6353.1069
## 10  5506.1704
## 11  4799.5795
## 12  4205.1902
## 13  3701.5037
## 14  3271.8625
## 15  2903.1993
## 16  2585.1492
## 17  2309.4092
## 18  2069.2687
## 19  1859.2596
## 20  1674.8933
## 21  1512.4600
## 22  1368.8744
## 23  1241.5555
## 24  1128.3325
## 25  1027.3708
## 26   937.1119
## 27   856.2268
## 28   783.5770
## 29   718.1836
## 30   660.5107
save(mat.z, sft.z, file="2.1.z_WGCNA_ThresholdingPower.UT.RData")