Quick start

Use one of the following examples after installing the Python library to get started:

CatBoostClassifier

import numpy as np
from catboost import CatBoostClassifier
# initialize data
train_data = np.random.randint(0, 100, size=(100, 10))
train_label = np.random.randint(0, 2, size=(100))
test_data = np.random.randint(0, 100, size=(50, 10))
# specify the training parameters 
model = CatBoostClassifier(iterations=2, depth=2, learning_rate=1, loss_function='Logloss', logging_level='Verbose')
#train the model
model.fit(train_data, train_label, cat_features=[0,2,5])
# make the prediction using the resulting model
preds_class = model.predict(test_data)
preds_proba = model.predict_proba(test_data)
print("class = ", preds_class)
print("proba = ", preds_proba)

CatBoostRegressor

import numpy as np
from catboost import Pool, CatBoostRegressor
# initialize data
train_data = np.random.randint(0, 100, size=(100, 10))
train_label = np.random.randint(0, 1000, size=(100))
test_data = np.random.randint(0, 100, size=(50, 10))
# initialize Pool
train_pool = Pool(train_data, train_label, cat_features=[0,2,5])
test_pool = Pool(test_data, cat_features=[0,2,5]) 

# specify the training parameters 
model = CatBoostRegressor(iterations=2, depth=2, learning_rate=1, loss_function='RMSE')
#train the model
model.fit(train_pool)
# make the prediction using the resulting model
preds = model.predict(test_pool)
print(preds)

CatBoost

Datasets can be read from input files. For example, the Pool class offers this functionality.

from catboost import Pool, CatBoost
# read the dataset
train_pool = Pool('../data/adult/train_small', column_description='../data/adult/train.cd')
test_pool = Pool('../data/adult/test_small', column_description='../data/adult/train.cd')
# specify the training parameters via map
param = {'iterations':2, 'depth':2, 'learning_rate':1, 'loss_function':'Logloss'}
model = CatBoost(param)
#train the model
model.fit(train_pool) 
# make the prediction using the resulting model
preds_class = model.predict(test_pool, prediction_type='Class')
preds_proba = model.predict(test_pool, prediction_type='Probability')
preds_raw_vals = model.predict(test_pool, prediction_type='RawFormulaVal')
print("Class", preds_class)
print("Proba", preds_proba)
print("Raw", preds_raw_vals)