Quick start

Use one of the following examples after installing the Python package to get started:

CatBoostClassifier

import numpy as np

from catboost import CatBoostClassifier, Pool

# initialize data
train_data = np.random.randint(0, 
                               100, 
                               size=(100, 10))
train_labels = np.random.randint(0, 
                                2, 
                                size=(100))
test_data_1 = np.random.randint(0, 
                                100, 
                                size=(50, 10))

test_data = catboost_pool = Pool(train_data, 
                                 train_labels)

model = CatBoostClassifier(iterations=2, 
                           depth=2, 
                           learning_rate=1, 
                           loss_function='Logloss', 
                           logging_level='Verbose')
#train the model
model.fit(train_data, train_labels)
# make the prediction using the resulting model
preds_class = model.predict(test_data)
preds_proba = model.predict_proba(test_data)
print("class = ", preds_class)
print("proba = ", preds_proba)

CatBoostRegressor

import numpy as np
from catboost import Pool, CatBoostRegressor
# initialize data
train_data = np.random.randint(0, 
                               100, 
                               size=(100, 10))
train_label = np.random.randint(0, 
                                1000, 
                                size=(100))
test_data = np.random.randint(0, 
                              100, 
                              size=(50, 10))
# initialize Pool
train_pool = Pool(train_data, 
                  train_label, 
                  cat_features=[0,2,5])
test_pool = Pool(test_data, 
                 cat_features=[0,2,5]) 

# specify the training parameters 
model = CatBoostRegressor(iterations=2, 
                          depth=2, 
                          learning_rate=1, 
                          loss_function='RMSE')
#train the model
model.fit(train_pool)
# make the prediction using the resulting model
preds = model.predict(test_pool)
print(preds)

CatBoost

Datasets can be read from input files. For example, the Pool class offers this functionality.

from catboost import Pool, CatBoost
# read the dataset
train_pool = Pool('../data/adult/train_small', column_description='../data/adult/train.cd')
test_pool = Pool('../data/adult/test_small', column_description='../data/adult/train.cd')
# specify the training parameters via map
param = {'iterations':2, 'depth':2, 'learning_rate':1, 'loss_function':'Logloss'}
model = CatBoost(param)
#train the model
model.fit(train_pool) 
# make the prediction using the resulting model
preds_class = model.predict(test_pool, prediction_type='Class')
preds_proba = model.predict(test_pool, prediction_type='Probability')
preds_raw_vals = model.predict(test_pool, prediction_type='RawFormulaVal')
print("Class", preds_class)
print("Proba", preds_proba)
print("Raw", preds_raw_vals)