Software: R version 3.6.1

# Prerequisites
loadpkg <- c("xgboost","data.table")
suppressPackageStartupMessages(lapply(loadpkg, function(pkg){
  if(!require(pkg,character.only = T)){
    install.packages(pkg, dependencies = T)
  }
  require(pkg,character.only = T)
}))

Alcoholic liver disease

# load models 
load_model <- get(load("ald_model.RData"))
# feature information 
feature.info <- readRDS("ald_feature_info.rds")
# feature names 
predictor.names <- readRDS("ald_predictor_names.rds")

The following conventional features are required. Categorical features include sex and smoking status.

feature.info$conventional.feature[,V1]
##  [1] "sex"                                 
##  [2] "age"                                 
##  [3] "body mass index"                     
##  [4] "waist-to-hip ratio"                  
##  [5] "alcohol consumption"                 
##  [6] "smoking status"                      
##  [7] "triglycerides"                       
##  [8] "low-density lipoprotein cholesterol" 
##  [9] "HIGH-density lipoprotein cholesterol"
## [10] "Gamma-glutamyl transferase"

The following microbial features are needed. More infomation about GTDB taxonomy can be found at https://gtdb.ecogenomic.org

str(feature.info$microbial.feature) 
## List of 10
##  $ : chr [1:109] "Absiella sp000163515" "Acetivibrio A ethanolgignens" "Agathobacter sp900317585" "Agathobaculum sp900291975" ...
##  $ : chr [1:128] " 14 2 sp001940225" "Agathobacter sp900317585" "An200 sp002160025" "Anaeromassilibacillus sp002159845" ...
##  $ : chr [1:276] " 14 2 sp001940225" "Acinetobacter baumannii" "Actinomyces graevenitzii" "Adlercreutzia caecimuris" ...
##  $ : chr [1:229] "Absiella innocuum" "Absiella sp000165065" "Acetatifactor muris" "Acinetobacter baumannii" ...
##  $ : chr [1:263] " 14 2 sp001940225" "Acetatifactor muris" "Actinomyces graevenitzii" "Acutalibacter sp000435395" ...
##  $ : chr [1:262] " 14 2 sp001940225" " 43 108 sp001915545" "Absiella innocuum" "Acetivibrio A ethanolgignens" ...
##  $ : chr [1:304] " 43 108 sp001915545" "Absiella dolichum" "Acetatifactor muris" "Acinetobacter baumannii" ...
##  $ : chr [1:288] " 14 2 sp001940225" "Acetatifactor muris" "Acinetobacter baumannii" "Actinomyces graevenitzii" ...
##  $ : chr [1:332] "Absiella sp000163515" "Absiella sp000165065" "Acetatifactor muris" "Acinetobacter baumannii" ...
##  $ : chr [1:248] " 14 2 sp001940225" "Absiella dolichum" "Actinomyces graevenitzii" "Acutalibacter sp000435395" ...

To apply the models on new data, the feature names of the corresponding dataset need to be formatted as follows.

lapply(predictor.names, head)
## [[1]]
## [1] "Bifidobacterium_sp002742445" "Butyricicoccus_A_porcorum"  
## [3] "Prevotella_sp900318795"      "Butyricimonas_virosa"       
## [5] "Dorea_formicigenerans"       "CAG_273_sp000438355"        
## 
## [[2]]
## [1] "ER4_sp002437735"           "Desulfovibrio_piger"      
## [3] "Parabacteroides_johnsonii" "CAG_475_sp002449145"      
## [5] "Butyricicoccus_A_porcorum" "CAG_83_sp000435975"       
## 
## [[3]]
## [1] "Desulfovibrio_piger"       "CAG_462_sp003489705"      
## [3] "Dakarella_massiliensis"    "CAG_1138_sp000434675"     
## [5] "Prevotella_sp002297965"    "Lactobacillus_H_fermentum"
## 
## [[4]]
## [1] "CAG_448_sp003150135"        "C941_sp900318865"          
## [3] "CAG_462_sp003489705"        "RC9_sp002393945"           
## [5] "Intestinibacter_bartlettii" "Firm_10_sp001603025"       
## 
## [[5]]
## [1] "CAG_448_sp003150135"       "RC9_sp002393945"          
## [3] "Desulfovibrio_piger"       "UBA11524_sp000437595"     
## [5] "CAG_273_sp000437855"       "Lactobacillus_H_fermentum"
## 
## [[6]]
## [1] "CAG_462_sp003489705"        "CAG_448_sp003150135"       
## [3] "Desulfovibrio_piger"        "Lactobacillus_H_fermentum" 
## [5] "Intestinibacter_bartlettii" "TF01_11_sp003524945"       
## 
## [[7]]
## [1] "UBA11524_sp000437595"      "CAG_462_sp003489705"      
## [3] "CAG_448_sp003150135"       "Prevotella_sp900318795"   
## [5] "Desulfovibrio_piger"       "Lactobacillus_H_fermentum"
## 
## [[8]]
## [1] "CAG_448_sp003150135"        "UBA11524_sp000437595"      
## [3] "CAG_462_sp003489705"        "Eubacterium_E_hallii_A"    
## [5] "Intestinibacter_bartlettii" "Faecalicatena_torques"     
## 
## [[9]]
## [1] "CAG_448_sp003150135"   "CAG_462_sp003489705"   "Butyricimonas_virosa" 
## [4] "Desulfovibrio_piger"   "Faecalicatena_torques" "Agathobacter_rectale" 
## 
## [[10]]
## [1] "RC9_sp002393945"        "Dakarella_massiliensis"
## [3] "CAG_273_sp000437855"    "CAG_83_sp000435975"    
## [5] "C941_sp900318865"       "UBA11524_sp000437595"

Example of applying a model on new data with required features.

model <- xgb.Booster.complete(load_model[[1]]) 
# newdata: xgb.DMatrix object with formatted features in predictor.names[[1]]
pred  <- predict(model, newdata) 

Any liver disease

# load models 
load_model <- get(load("ld_model.RData"))
# feature information 
feature.info <- readRDS("ld_feature_info.rds")
# feature names 
predictor.names <- readRDS("ld_predictor_names.rds")

The following conventional features are required. Categorical features include sex and smoking status.

feature.info$conventional.feature[,V1]
##  [1] "sex"                                 
##  [2] "age"                                 
##  [3] "body mass index"                     
##  [4] "waist-to-hip ratio"                  
##  [5] "alcohol consumption"                 
##  [6] "smoking status"                      
##  [7] "triglycerides"                       
##  [8] "low-density lipoprotein cholesterol" 
##  [9] "HIGH-density lipoprotein cholesterol"
## [10] "Gamma-glutamyl transferase"

The following microbial features are needed. More infomation about GTDB taxonomy can be found at https://gtdb.ecogenomic.org

str(feature.info$microbial.feature) 
## List of 10
##  $ : chr [1:160] "Absiella sp000163515" "Absiella sp000165065" "Acetatifactor sp900066365" "Actinotalea carbonis" ...
##  $ : chr [1:112] "Absiella sp000165065" "Actinomyces graevenitzii" "Actinotalea carbonis" "Agathobaculum butyriciproducens" ...
##  $ : chr [1:157] "Absiella sp000165065" "Acetatifactor sp003447295" "Acetatifactor sp900066365" "Acetatifactor sp900066565" ...
##  $ : chr [1:59] "Absiella sp000163515" "Acetatifactor sp900066365" "Actinomyces graevenitzii" "Agathobaculum butyriciproducens" ...
##  $ : chr [1:195] " 51 20 sp001917175" "Absiella innocuum" "Absiella sp000165065" "Acetatifactor sp003447295" ...
##  $ : chr [1:143] "Acetatifactor sp900066365" "Actinomyces graevenitzii" "Agathobaculum butyriciproducens" "An200 sp002160025" ...
##  $ : chr [1:209] "Absiella innocuum" "Absiella sp000165065" "Acetatifactor sp003447295" "Acetatifactor sp900066365" ...
##  $ : chr [1:45] " 43 108 sp001915545" "Acetatifactor sp900066365" "Acetatifactor sp900066565" "Acutalibacter sp000432995" ...
##  $ : chr [1:247] "Absiella innocuum" "Absiella sp000163515" "Absiella sp000165065" "Acetatifactor muris" ...
##  $ : chr [1:92] "Absiella sp000165065" "Acetatifactor sp900066365" "Actinomyces graevenitzii" "Anaerotignum sp000436415" ...

To apply the models on new data, the feature names of the corresponding dataset need to be formatted as follows.

lapply(predictor.names, head)
## [[1]]
## [1] "CAG_269_sp003518755"         "QALS01_sp003150575"         
## [3] "Acetatifactor_sp900066365"   "Marseille_P4683_sp900232885"
## [5] "Dakarella_massiliensis"      "CAG_1427_sp000435475"       
## 
## [[2]]
## [1] "CAG_448_sp003150135"        "ER4_sp002437735"           
## [3] "Streptococcus_sp000187445"  "UCG_010_sp003150115"       
## [5] "UBA3263_sp001689615"        "Ruminococcus_C_sp000437175"
## 
## [[3]]
## [1] "UBA5905_sp002431285"       "Streptococcus_sp000187445"
## [3] "Sharpea_azabuensis"        "CAG_269_sp001915995"      
## [5] "Lactobacillus_H_fermentum" "CAG_448_sp003150135"      
## 
## [[4]]
## [1] "Faecalicatena_torques"           "Parabacteroides_johnsonii"      
## [3] "Streptococcus_sp000187445"       "UBA5905_sp002431285"            
## [5] "Agathobaculum_butyriciproducens" "Marseille_P4683_sp900232885"    
## 
## [[5]]
## [1] "CAG_460_sp000437355"         "UBA9475_sp002161235"        
## [3] "Acetatifactor_sp900066365"   "Marseille_P4683_sp900232885"
## [5] "Anaerotignum_sp000436415"    "Dorea_sp900066765"          
## 
## [[6]]
## [1] "Acetatifactor_sp900066365" "UBA11471_sp000434215"     
## [3] "Anaerotignum_sp000436415"  "UBA4717_sp002404395"      
## [5] "UBA5905_sp002431285"       "UBA1375_sp002305795"      
## 
## [[7]]
## [1] "Streptococcus_sp000187445"  "Ruminococcus_A_sp000432335"
## [3] "Lactobacillus_H_fermentum"  "Dakarella_massiliensis"    
## [5] "Parabacteroides_johnsonii"  "CAG_110_sp900315595"       
## 
## [[8]]
## [1] "Lactobacillus_H_fermentum"  "Acetatifactor_sp900066365" 
## [3] "Ruminococcus_C_sp000437175" "Ruminococcus_A_sp000432335"
## [5] "Alistipes_sp900021155"      "taxa_43_108_sp001915545"   
## 
## [[9]]
## [1] "CAG_83_sp000435975"        "CAG_95_sp000438155"       
## [3] "Prevotella_sp900315565"    "UC5_1_2E3_sp001304875"    
## [5] "Porphyromonas_somerae"     "Lactobacillus_H_fermentum"
## 
## [[10]]
## [1] "CAG_110_sp000435995"        "Acetatifactor_sp900066365" 
## [3] "Streptococcus_sp000187445"  "Prevotella_maculosa"       
## [5] "Ruminococcus_D_sp000434695" "CAG_448_sp003150135"

Example of applying a model on new data with required features.

model <- xgb.Booster.complete(load_model[[1]]) 
# newdata: xgb.DMatrix object with formatted features in predictor.names[[1]]
pred  <- predict(model, newdata)