from __future__ import absolute_import, unicode_literals
import json
import os
from builtins import object, range
from builtins import str as newstr
import btb
from atm.constants import METHODS
[docs]class HyperParameter(object):
@property
def is_categorical(self):
return False
@property
def is_constant(self):
return False
[docs]class Numeric(HyperParameter):
def __init__(self, name, type, range):
self.name = name
self.type = type
self.range = range
@property
def is_constant(self):
return len(self.range) == 1
[docs] def as_tunable(self):
return btb.HyperParameter(param_type=self.type, param_range=self.range)
[docs]class Categorical(HyperParameter):
def __init__(self, name, type, values):
self.name = name
self.type = type
for i, val in enumerate(values):
if val is None:
# the value None is allowed for every parameter type
continue
if self.type == 'int_cat':
values[i] = int(val)
elif self.type == 'float_cat':
values[i] = float(val)
elif self.type == 'string':
# this is necessary to avoid a bug in sklearn, which won't be
# fixed until 0.20
values[i] = str(newstr(val))
elif self.type == 'bool':
values[i] = bool(val)
self.values = values
@property
def is_categorical(self):
return True
@property
def is_constant(self):
return len(self.values) == 1
[docs] def as_tunable(self):
return btb.HyperParameter(param_type=self.type, param_range=self.values)
[docs]class List(HyperParameter):
def __init__(self, name, type, list_length, element):
self.name = name
self.length = Categorical('len(%s)' % self.name, 'int_cat', list_length)
element_type = HYPERPARAMETER_TYPES[element['type']]
self.element = element_type('element', **element)
@property
def is_categorical(self):
return True
[docs] def get_elements(self):
elements = []
for i in range(max(self.length.values)):
# generate names for the pseudo-hyperparameters in the list
elt_name = '%s[%d]' % (self.name, i)
elements.append(elt_name)
conditions = {str(i): elements[:i] for i in self.length.values}
return elements, conditions
[docs]class HyperPartition(object):
"""
Class which holds the hyperparameter settings that define a hyperpartition.
"""
def __init__(self, categoricals, constants, tunables):
"""
categoricals: the values for this hyperpartition which have been fixed
and define the hyperpartition. List of tuples of the form ('param', val).
constants: the values for this hyperpartition for which there was no
choice. List of tuples of the form ('param', val).
tunables: the free variables which must be tuned. List of tuples of the
form ('param', HyperParameter).
"""
self.categoricals = categoricals
self.constants = constants
self.tunables = tunables
def __repr__(self):
cats, cons, tuns = [None] * 3
if self.categoricals:
cats = '[%s]' % ', '.join(['%s=%s' % c for c in self.categoricals])
if self.constants:
cons = '[%s]' % ', '.join(['%s=%s' % c for c in self.constants])
if self.tunables:
tuns = '[%s]' % ', '.join(['%s' % t for t, _ in self.tunables])
return ('<HyperPartition: categoricals: %s; constants: %s; tunables: %s>'
% (cats, cons, tuns))
HYPERPARAMETER_TYPES = {
'int': Numeric,
'int_exp': Numeric,
'float': Numeric,
'float_exp': Numeric,
'int_cat': Categorical,
'float_cat': Categorical,
'string': Categorical,
'bool': Categorical,
'list': List,
}
[docs]class Method(object):
"""
This class is initialized with the name of a json configuration file.
The config contains information about a classification method and the
hyperparameter arguments it needs to run. Its main purpose is to generate
hyperpartitions (possible combinations of categorical hyperparameters).
"""
def __init__(self, method):
"""
method: method code or path to JSON file containing all the information
needed to specify this enumerator.
"""
if method in METHODS:
# if the configured method is a code, look up the path to its json
config_path = os.path.join(os.path.dirname(__file__), 'methods', METHODS[method])
else:
# otherwise, it must be a path to a file
config_path = method
with open(config_path) as f:
config = json.load(f)
self.name = config['name']
self.root_params = config['root_hyperparameters']
self.conditions = config['conditional_hyperparameters']
self.class_path = config['class']
# create hyperparameters from the parameter config
self.parameters = {}
for k, v in list(config['hyperparameters'].items()):
param_type = HYPERPARAMETER_TYPES[v['type']]
self.parameters[k] = param_type(name=k, **v)
# List hyperparameters are special. These are replaced in the
# CPT with a size hyperparameter and sets of element hyperparameters
# conditioned on the size.
for name, param in list(self.parameters.items()):
if isinstance(param, List):
elements, conditions = param.get_elements()
for e in elements:
self.parameters[e] = param.element
# add the size parameter, remove the list parameter
self.parameters[param.length.name] = param.length
del self.parameters[param.name]
# if this is a root param, replace its name with the new size
# name in the root params list
if param.name in self.root_params:
self.root_params.append(param.length.name)
self.root_params.remove(param.name)
# if this is a conditional param, replace it there instead
for var, cond in list(self.conditions.items()):
for val, deps in list(cond.items()):
if param.name in deps:
deps.append(param.length.name)
deps.remove(param.name)
self.conditions[var][val] = deps
# finally, add all the potential sets of list elements as
# conditions of the list's size
self.conditions[param.length.name] = conditions
def _sort_parameters(self, params):
"""
Sort a list of HyperParameter objects into lists of constants,
categoricals, and tunables.
"""
constants = []
categoricals = []
tunables = []
for p in params:
param = self.parameters[p]
if param.is_constant:
if param.is_categorical:
constants.append((p, param.values[0]))
else:
constants.append((p, param.range[0]))
elif param.is_categorical:
categoricals.append(p)
else:
tunables.append((p, param.as_tunable()))
return constants, categoricals, tunables
def _enumerate(self, fixed_cats, constants, free_cats, tunables):
"""
Some things are fixed. Make a choice from the things that aren't fixed
and see where that leaves us. Recurse.
fixed_cats: a list of (name, value) tuples of qualified categorical
variables
constants: a list of (name, value) tuples of fixed constants
free_cats: a list of names of free categorical variables
tunables: a list of names of free tunable parameters
Returns: a list of HyperPartition objects
"""
# if there are no more free variables, we have a new HyperPartition. We've
# reached the bottom of the recursion, so return.
if not free_cats:
return [HyperPartition(fixed_cats, constants, tunables)]
parts = []
# fix a single categorical parameter, removing it from the list of free
# variables, and see where that takes us
cat = free_cats.pop(0)
for val in self.parameters[cat].values:
# add this value to the list of qualified categoricals
new_fixed_cats = fixed_cats + [(cat, val)]
# these lists are copied for now
new_constants = constants[:]
new_free_cats = free_cats[:]
new_tunables = tunables[:]
# check if choosing this value opens up new parts of the conditional
# parameter tree.
# we need to check conditions for str(val) because all keys in json
# must be strings.
if cat in self.conditions and str(val) in self.conditions[cat]:
# categorize the conditional variables which are now in play
new_params = self.conditions[cat][str(val)]
cons, cats, tuns = self._sort_parameters(new_params)
new_constants = constants + cons
new_free_cats = free_cats + cats
new_tunables = tunables + tuns
# recurse with the newly qualified categorical as a constant
parts.extend(self._enumerate(fixed_cats=new_fixed_cats,
constants=new_constants,
free_cats=new_free_cats,
tunables=new_tunables))
return parts
[docs] def get_hyperpartitions(self):
"""
Traverse the CPT and enumerate all possible hyperpartitions of
categorical parameters for this method.
"""
return self._enumerate([], *self._sort_parameters(self.root_params))