Source code for atm.constants

# -*- coding: utf-8 -*-

"""Constants module."""

from __future__ import absolute_import, unicode_literals

import logging
from builtins import object

from btb.selection import (
    UCB1, BestKReward, BestKVelocity, HierarchicalByAlgorithm, PureBestKVelocity, RecentKReward,
    RecentKVelocity)
from btb.selection import Uniform as UniformSelector
from btb.tuning import GP, GPEi, GPEiVelocity
from btb.tuning import Uniform as UniformTuner

# A bunch of constants which are used throughout the project, mostly for config.
# TODO: convert these lists and classes to something more elegant, like enums
SQL_DIALECTS = ['sqlite', 'mysql']
SCORE_TARGETS = ['cv', 'test', 'mu_sigma']
BUDGET_TYPES = ['none', 'classifier', 'walltime']
DATARUN_STATUS = ['pending', 'running', 'complete']
CLASSIFIER_STATUS = ['running', 'errored', 'complete']
PARTITION_STATUS = ['incomplete', 'errored', 'gridding_done']

S3_PREFIX = '^s3://'
HTTP_PREFIX = '^https?://'

TIME_FMT = '%Y-%m-%d %H:%M'

CUSTOM_CLASS_REGEX = r'(.*\.py):(\w+)$'
JSON_REGEX = r'(.*\.json)$'

N_FOLDS_DEFAULT = 10

LOG_LEVELS = {
    'CRITICAL': logging.CRITICAL,
    'ERROR': logging.ERROR,
    'WARNING': logging.WARNING,
    'INFO': logging.INFO,
    'DEBUG': logging.DEBUG,
    'NONE': logging.NOTSET
}

TUNERS = {
    'uniform': UniformTuner,
    'gp': GP,
    'gp_ei': GPEi,
    'gp_eivel': GPEiVelocity,
}

SELECTORS = {
    'uniform': UniformSelector,
    'ucb1': UCB1,
    'bestk': BestKReward,
    'bestkvel': BestKVelocity,
    'purebestkvel': PureBestKVelocity,
    'recentk': RecentKReward,
    'recentkvel': RecentKVelocity,
    'hieralg': HierarchicalByAlgorithm,
}

METHODS = {
    'logreg': 'logistic_regression.json',
    'svm': 'support_vector_machine.json',
    'sgd': 'stochastic_gradient_descent.json',
    'dt': 'decision_tree.json',
    'et': 'extra_trees.json',
    'rf': 'random_forest.json',
    'gnb': 'gaussian_naive_bayes.json',
    'mnb': 'multinomial_naive_bayes.json',
    'bnb': 'bernoulli_naive_bayes.json',
    'gp': 'gaussian_process.json',
    'pa': 'passive_aggressive.json',
    'knn': 'k_nearest_neighbors.json',
    'mlp': 'multi_layer_perceptron.json',
    'ada': 'adaboost.json'
}


[docs]class ClassifierStatus(object): RUNNING = 'running' ERRORED = 'errored' COMPLETE = 'complete'
[docs]class RunStatus(object): PENDING = 'pending' RUNNING = 'running' COMPLETE = 'complete'
[docs]class PartitionStatus(object): INCOMPLETE = 'incomplete' GRIDDING_DONE = 'gridding_done' ERRORED = 'errored'
[docs]class FileType(object): LOCAL = 'local' S3 = 's3' HTTP = 'http'
# these are the strings that are used to index into results dictionaries
[docs]class Metrics(object): ACCURACY = 'accuracy' RANK_ACCURACY = 'rank_accuracy' COHEN_KAPPA = 'cohen_kappa' F1 = 'f1' F1_MICRO = 'f1_micro' F1_MACRO = 'f1_macro' ROC_AUC = 'roc_auc' # receiver operating characteristic ROC_AUC_MICRO = 'roc_auc_micro' ROC_AUC_MACRO = 'roc_auc_macro' AP = 'ap' # average precision MCC = 'mcc' # matthews correlation coefficient PR_CURVE = 'pr_curve' ROC_CURVE = 'roc_curve'
METRICS_BINARY = [ Metrics.ACCURACY, Metrics.COHEN_KAPPA, Metrics.F1, Metrics.ROC_AUC, Metrics.AP, Metrics.MCC, ] METRICS_MULTICLASS = [ Metrics.ACCURACY, Metrics.RANK_ACCURACY, Metrics.COHEN_KAPPA, Metrics.F1_MICRO, Metrics.F1_MACRO, Metrics.ROC_AUC_MICRO, Metrics.ROC_AUC_MACRO, ] METRICS = list(set(METRICS_BINARY + METRICS_MULTICLASS))