from __future__ import absolute_import, unicode_literals
import base64
import hashlib
import json
import logging
import os
import pickle
from builtins import str
import numpy as np
from atm.compat import getargs
logger = logging.getLogger('atm')
[docs]def hash_dict(dictionary, ignored_keys=None):
"""
Hash a python dictionary to a hexadecimal string.
http://stackoverflow.com/questions/5884066/hashing-a-python-dictionary
"""
dictionary = dict(dictionary) # copy dictionary
for key in (ignored_keys or []):
del dictionary[key]
return hashlib.md5(repr(sorted(dictionary.items())).encode('utf8')).hexdigest()
[docs]def hash_nested_tuple(tup):
""" Hash a nested tuple to hexadecimal """
return hashlib.md5(repr(sorted(tup)).encode('utf8')).hexdigest()
[docs]def hash_string(s):
""" Hash a string to hexadecimal """
return hashlib.md5(str(s).encode('utf8')).hexdigest()
[docs]def ensure_directory(directory):
""" Create directory if it doesn't exist. """
if not os.path.exists(directory):
os.makedirs(directory)
[docs]def object_to_base_64(obj):
""" Pickle and base64-encode an object. """
pickled = pickle.dumps(obj)
return base64.b64encode(pickled)
[docs]def base_64_to_object(b64str):
"""
Inverse of object_to_base_64.
Decode base64-encoded string and then unpickle it.
"""
decoded = base64.b64decode(b64str)
return pickle.loads(decoded)
[docs]def obj_has_method(obj, method):
"""http://stackoverflow.com/questions/34439/finding-what-methods-an-object-has"""
return hasattr(obj, method) and callable(getattr(obj, method))
# Converting hyperparameters to and from BTB-compatible formats
[docs]def update_params(params, categoricals, constants):
"""
Update params with categoricals and constants for the fitting proces.
params: params proposed by the tuner
Examples of the format for SVM sigmoid hyperpartition:
categoricals = (('kernel', 'poly'),
('probability', True),
('_scale', True))
constants = [('cache_size', 15000)]
"""
for key, value in categoricals + constants:
params[key] = value
return params
[docs]def get_instance(class_, **kwargs):
"""Create an instance of the given class with required kwargs.
The exact keyword arguments that the given ``class_`` expects
will be taken from ``kwargs`` and the rest will be ignored.
Args:
class_ (type):
class to instantiate
**kwargs:
keyword arguments
Returns:
instance of specific class with the args that accepts.
"""
init_args = getargs(class_.__init__)
relevant_kwargs = {
k: kwargs[k]
for k in kwargs
if k in init_args
}
return class_(**relevant_kwargs)
[docs]def params_to_vectors(params, tunables):
"""
Converts a list of parameter vectors (with metadata) into a numpy array
ready for BTB tuning.
Args:
params: list of hyperparameter vectors. Each vector is a dict mapping
the names of parameters to those parameters' values.
tunables: list of HyperParameter metadata structures describing all
the optimizable hyperparameters that should be in each vector. e.g.
tunables = [('C', HyperParameter(type='float_exp', range=(1e-5, 1e5))),
('degree', HyperParameter('int', (2, 4))),
('gamma', HyperParameter('float_exp', (1e-05, 1e5)))]
Returns:
vectors: np.array of parameter vectors ready to be optimized by a
Gaussian Process (or what have you).
vectors.shape = (len(params), len(tunables))
"""
# make sure params is iterable
if not isinstance(params, (list, np.ndarray)):
params = [params]
keys = [k[0] for k in tunables]
vectors = np.zeros((len(params), len(keys)))
for i, p in enumerate(params):
for j, k in enumerate(keys):
vectors[i, j] = p[k]
return vectors
# Serializing and deserializing data on disk
[docs]def make_save_path(dir, classifier, suffix):
"""
Generate the base save path for a classifier's model and metrics files,
based on the classifier's dataset name and hyperparameters.
"""
run_name = "".join([c for c in classifier.datarun.dataset.name
if c.isalnum() or c in (' ', '-', '_')]).rstrip()
params_hash = hash_dict(classifier.hyperparameter_values)[:8]
filename = "%s-%s.%s" % (run_name, params_hash, suffix)
return os.path.join(dir, filename)
[docs]def save_model(classifier, models_dir, model):
"""
Save a serialized version of a Model object for a particular classifier.
The object will be stored at a path generated from the classifier's
attributes.
"""
path = make_save_path(models_dir, classifier, 'model')
logger.info('Saving model in: %s' % path)
with open(path, 'wb') as f:
pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
return path
[docs]def save_metrics(classifier, metrics_dir, metrics):
"""
Save a JSON-serialized version of a set of performance metrics for a
particular classifier. The metrics will be stored at a path generated from
the classifier's attributes.
"""
path = make_save_path(metrics_dir, classifier, 'metric')
logger.info('Saving metrics in: %s' % path)
with open(path, 'w') as f:
json.dump(metrics, f)
return path
[docs]def load_model(classifier, models_dir):
""" Load the Model object for a particular classifier """
path = make_save_path(models_dir, classifier, 'model')
with open(path, 'rb') as f:
return pickle.load(f)
[docs]def load_metrics(classifier, metrics_dir):
""" Load the performance metrics for a particular classifier """
path = make_save_path(metrics_dir, classifier, 'metric')
with open(path) as f:
return json.load(f)