问答文章1 问答文章501 问答文章1001 问答文章1501 问答文章2001 问答文章2501 问答文章3001 问答文章3501 问答文章4001 问答文章4501 问答文章5001 问答文章5501 问答文章6001 问答文章6501 问答文章7001 问答文章7501 问答文章8001 问答文章8501 问答文章9001 问答文章9501

如何在r语言中使用 lightgbm

发布网友 发布时间:2022-05-15 09:07

我来回答

1个回答

热心网友 时间:2023-10-17 00:48

本文用到的处理二值数据的方法,有以下两种:
glm(generalized boosted models)
glmnet(generalized linear models)
glm使用了boosted trees,glmnet使用了regression
# load libraries
library(caret)
library(pROC)

#################################################
# data prep
#################################################

# load data
titanicDF <- read.csv('http://math.ucdenver.e/RTutorial/titanic.txt',sep='\t')
titanicDF$Title <- ifelse(grepl('Mr ',titanicDF$Name),'Mr',ifelse(grepl('Mrs ',titanicDF$Name),'Mrs',ifelse(grepl('Miss',titanicDF$Name),'Miss','Nothing')))
titanicDF$Age[is.na(titanicDF$Age)] <- median(titanicDF$Age, na.rm=T)

# miso format
titanicDF <- titanicDF[c('PClass', 'Age', 'Sex', 'Title', 'Survived')]

# mmy variables for factors/characters
titanicDF$Title <- as.factor(titanicDF$Title)
titanicDummy <- mmyVars("~.",data=titanicDF, fullRank=F)
titanicDF <- as.data.frame(predict(titanicDummy,titanicDF))
print(names(titanicDF))

# what is the proportion of your outcome variable?
prop.table(table(titanicDF$Survived))

# save the outcome for the glmnet model
tempOutcome <- titanicDF$Survived

# generalize outcome and predictor variables
outcomeName <- 'Survived'
predictorsNames <- names(titanicDF)[names(titanicDF) != outcomeName]

#################################################
# model it
#################################################
# get names of all caret supported models
names(getModelInfo())

titanicDF$Survived <- ifelse(titanicDF$Survived==1,'yes','nope')

# pick model gbm and find out what type of model it is
getModelInfo()$gbm$type

# split data into training and testing chunks
set.seed(1234)
splitIndex <- createDataPartition(titanicDF[,outcomeName], p = .75, list = FALSE, times = 1)
trainDF <- titanicDF[ splitIndex,]
testDF <- titanicDF[-splitIndex,]

# create caret trainControl object to control the number of cross-validations performed
objControl <- trainControl(method='cv', number=3, returnResamp='none', summaryFunction = twoClassSummary, classProbs = TRUE)

# run model
objModel <- train(trainDF[,predictorsNames], as.factor(trainDF[,outcomeName]),
method='gbm',
trControl=objControl,
metric = "ROC",
preProc = c("center", "scale"))
)

# find out variable importance
summary(objModel)

# find out model details
objModel

#################################################
# evalute mdoel
#################################################
# get predictions on your testing data

# class prediction
predictions <- predict(object=objModel, testDF[,predictorsNames], type='raw')
head(predictions)
postResample(pred=predictions, obs=as.factor(testDF[,outcomeName]))

# probabilites
predictions <- predict(object=objModel, testDF[,predictorsNames], type='prob')
head(predictions)
postResample(pred=predictions, obs=testDF[,outcomeName])

auc <- roc(ifelse(testDF[,outcomeName]=="yes",1,0), predictions[[2]])
print(auc$auc)

################################################
# glm model
################################################

# pick model gbm and find out what type of model it is
getModelInfo()$glmnet$type

# save the outcome for the glmnet model
titanicDF$Survived <- tempOutcome

# split data into training and testing chunks
set.seed(1234)
splitIndex <- createDataPartition(titanicDF[,outcomeName], p = .75, list = FALSE, times = 1)
trainDF <- titanicDF[ splitIndex,]
testDF <- titanicDF[-splitIndex,]

# create caret trainControl object to control the number of cross-validations performed
objControl <- trainControl(method='cv', number=3, returnResamp='none')

# run model
objModel <- train(trainDF[,predictorsNames], trainDF[,outcomeName], method='glmnet', metric = "RMSE")

# get predictions on your testing data
predictions <- predict(object=objModel, testDF[,predictorsNames])

library(pROC)
auc <- roc(testDF[,outcomeName], predictions)
print(auc$auc)

postResample(pred=predictions, obs=testDF[,outcomeName])

# find out variable importance
summary(objModel)
plot(varImp(objModel,scale=F))

# find out model details
objModel

# display variable importance on a +/- scale
vimp <- varImp(objModel, scale=F)
results <- data.frame(row.names(vimp$importance),vimp$importance$Overall)
results$VariableName <- rownames(vimp)
colnames(results) <- c('VariableName','Weight')
results <- results[order(results$Weight),]
results <- results[(results$Weight != 0),]

par(mar=c(5,15,4,2)) # increase y-axis margin.
xx <- barplot(results$Weight, width = 0.85,
main = paste("Variable Importance -",outcomeName), horiz = T,
xlab = "< (-) importance > < neutral > < importance (+) >", axes = FALSE,
col = ifelse((results$Weight > 0), 'blue', 'red'))
axis(2, at=xx, labels=results$VariableName, tick=FALSE, las=2, line=-0.3, cex.axis=0.6)

################################################
# advanced stuff
################################################

# boosted tree model (gbm) adjust learning rate and and trees
gbmGrid <- expand.grid(interaction.depth = c(1, 5, 9),
n.trees = 50,
shrinkage = 0.01)

# run model
objModel <- train(trainDF[,predictorsNames], trainDF[,outcomeName], method='gbm', trControl=objControl, tuneGrid = gbmGrid, verbose=F)

# get predictions on your testing data
predictions <- predict(object=objModel, testDF[,predictorsNames])

library(pROC)
auc <- roc(testDF[,outcomeName], predictions)
print(auc$auc)
声明声明:本网页内容为用户发布,旨在传播知识,不代表本网认同其观点,若有侵权等问题请及时与本网联系,我们将在第一时间删除处理。E-MAIL:11247931@qq.com
国外留学有用吗 花钱出国留学有用吗 !这叫什么号 百万医疗赔付后是否可以续保 前一年理赔过医疗险还能续保吗? 医疗住院险理赔后还能购买吗? 女生多大后可以不在长身高? 如何不用软件把手机投屏到电脑上手机屏幕怎样投放到电脑上 战时拒绝、故意延误军事订货罪既遂的处罚? 战时故意延误军事订货罪处罚标准 matlab非线性拟合求参数问题,急求解答。。。 请问以下代码怎么修改置信水平呢?它默认是0.95,我貌似改成0.99没有用 matlab高手帮帮忙 matlab编写方程 matlab编程问题,已知函数表达式和数据,求表达式中的系数 高手请指点,matlab问题。 知道函数的初始和目标状态,如何用matlab求解带未知数的系数函数表达式 matlab遇到的问题 matlab高手帮帮忙~~ 滴滴行程打不开怎么办 国有独资公司的董事长,副董事长,监事会*都是由国有资产监督管理机构制定的? 拟任市属国有企业正职领导职务什么级别 太原市国有资产经营公司李晓静 财政局有个什么国有资产管理或者投资公司,一般的说来,局长是董事长 快手主页怎么挂抖音号 我与爸爸妈妈妹妹度过了一个愉快的假期翻译英语? 写《愉快的假期》作文大全 美的电压力锅M丫一12CH603A电路图 吉利远景x6第一次保养要多久时间 不记得码,又把微信卸载了,怎么才能找回? 在上海生产内外墙腻子有哪些厂家 腻子粉什么品牌好 上海爱嘉腻子粉好不好 中国有多少美巢腻子粉厂 什么牌子外墙腻子没有纤维 立邦腻子有授权吗,真正立邦厂生产腻子粉吗 华润腻子粉厂址在哪里以及价格 松树夏天为什么会滴松脂? 斌盛腻子粉怎么样 腻子是什么 腻子粉什么牌子好 腻子粉编织袋生产厂家哪家好 兔耳风是什么中药 松树为什么会分泌松脂,松脂对松树有什么用?松树油有什么作用 兔耳风煮水加蜂蜜有什么效果? 松树往下淌油怎么回事 风柜斗草和红背兔耳风一起用吗? 青皮草的功效与作用 农村的光棍草是怎样一种植物?这种草都有哪些作用? 光棍草图片及功效 草药“一朵云”是什么药??