public class GBTClassifier extends ProbabilisticClassifier<Vector,GBTClassifier,GBTClassificationModel> implements GBTClassifierParams, DefaultParamsWritable, Logging
The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999.
Notes on Gradient Boosting vs. TreeBoost: - This implementation is for Stochastic Gradient Boosting, not for TreeBoost. - Both algorithms learn tree ensembles by minimizing loss functions. - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes based on the loss function, whereas the original gradient boosting method does not. - We expect to implement TreeBoost in the future: [https://issues.apache.org/jira/browse/SPARK-4240]
Constructor and Description |
---|
GBTClassifier() |
GBTClassifier(String uid) |
Modifier and Type | Method and Description |
---|---|
GBTClassifier |
copy(ParamMap extra)
Creates a copy of this instance with the same UID and some extra params.
|
static GBTClassifier |
load(String path) |
static MLReader<T> |
read() |
GBTClassifier |
setCacheNodeIds(boolean value) |
GBTClassifier |
setCheckpointInterval(int value)
Specifies how often to checkpoint the cached node IDs.
|
GBTClassifier |
setFeatureSubsetStrategy(String value) |
GBTClassifier |
setImpurity(String value)
The impurity setting is ignored for GBT models.
|
GBTClassifier |
setLossType(String value) |
GBTClassifier |
setMaxBins(int value) |
GBTClassifier |
setMaxDepth(int value) |
GBTClassifier |
setMaxIter(int value) |
GBTClassifier |
setMaxMemoryInMB(int value) |
GBTClassifier |
setMinInfoGain(double value) |
GBTClassifier |
setMinInstancesPerNode(int value) |
GBTClassifier |
setSeed(long value) |
GBTClassifier |
setStepSize(double value) |
GBTClassifier |
setSubsamplingRate(double value) |
GBTClassifier |
setValidationIndicatorCol(String value) |
static String[] |
supportedLossTypes()
Accessor for supported loss settings: logistic
|
String |
uid()
An immutable unique ID for the object and its derivatives.
|
setProbabilityCol, setThresholds
setRawPredictionCol
fit, setFeaturesCol, setLabelCol, setPredictionCol, transformSchema
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getLossType, getOldLossType, lossType
getOldBoostingStrategy, getValidationTol, stepSize, validationTol
featureSubsetStrategy, getFeatureSubsetStrategy, getOldStrategy, getSubsamplingRate, subsamplingRate
cacheNodeIds, getCacheNodeIds, getMaxBins, getMaxDepth, getMaxMemoryInMB, getMinInfoGain, getMinInstancesPerNode, getOldStrategy, maxBins, maxDepth, maxMemoryInMB, minInfoGain, minInstancesPerNode
validateAndTransformSchema
getLabelCol, labelCol
featuresCol, getFeaturesCol
getPredictionCol, predictionCol
clear, copyValues, defaultCopy, defaultParamMap, explainParam, explainParams, extractParamMap, extractParamMap, get, getDefault, getOrDefault, getParam, hasDefault, hasParam, isDefined, isSet, paramMap, params, set, set, set, setDefault, setDefault, shouldOwn
toString
checkpointInterval, getCheckpointInterval
getMaxIter, maxIter
getStepSize
getValidationIndicatorCol, validationIndicatorCol
getImpurity, getOldImpurity, impurity
write
save
initializeLogging, initializeLogIfNecessary, initializeLogIfNecessary, isTraceEnabled, log_, log, logDebug, logDebug, logError, logError, logInfo, logInfo, logName, logTrace, logTrace, logWarning, logWarning
validateAndTransformSchema
getRawPredictionCol, rawPredictionCol
getProbabilityCol, probabilityCol
getThresholds, thresholds
public GBTClassifier(String uid)
public GBTClassifier()
public static final String[] supportedLossTypes()
public static GBTClassifier load(String path)
public static MLReader<T> read()
public String uid()
Identifiable
uid
in interface Identifiable
public GBTClassifier setMaxDepth(int value)
setMaxDepth
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setMaxBins(int value)
setMaxBins
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setMinInstancesPerNode(int value)
setMinInstancesPerNode
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setMinInfoGain(double value)
setMinInfoGain
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setMaxMemoryInMB(int value)
setMaxMemoryInMB
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setCacheNodeIds(boolean value)
setCacheNodeIds
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setCheckpointInterval(int value)
SparkContext
.
Must be at least 1.
(default = 10)setCheckpointInterval
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setImpurity(String value)
setImpurity
in interface TreeClassifierParams
value
- (undocumented)public GBTClassifier setSubsamplingRate(double value)
setSubsamplingRate
in interface TreeEnsembleParams
value
- (undocumented)public GBTClassifier setSeed(long value)
setSeed
in interface DecisionTreeParams
value
- (undocumented)public GBTClassifier setMaxIter(int value)
setMaxIter
in interface GBTParams
value
- (undocumented)public GBTClassifier setStepSize(double value)
setStepSize
in interface GBTParams
value
- (undocumented)public GBTClassifier setFeatureSubsetStrategy(String value)
setFeatureSubsetStrategy
in interface TreeEnsembleParams
value
- (undocumented)public GBTClassifier setLossType(String value)
public GBTClassifier setValidationIndicatorCol(String value)
public GBTClassifier copy(ParamMap extra)
Params
defaultCopy()
.copy
in interface Params
copy
in class Predictor<Vector,GBTClassifier,GBTClassificationModel>
extra
- (undocumented)