from collections import MutableSequence
import featurehub.modeling.model
[docs]class Metric(object):
"""Metric"""
def __init__(self, name, scoring, value):
self.name = name
self.scoring = scoring
self.value = value
def __eq__(self, other):
if not isinstance(other, Metric):
return False
return self.name == other.name \
and self.scoring == other.scoring \
and self.value == other.value
def __str__(self):
return "<{} object with fields {}>".format(type(self), self.__dict__)
def __repr__(self):
return str(self)
def __lt__(self, other):
return self.name < other.name
def __gt__(self, other):
return self.name > other.name
[docs] def convert(self, kind="user"):
"""Convert to nice format for returning to user or inserting into db.
Conversion to user format returns a dictinary with one element mapping
metric name to metric value. Conversion to db format returns a
dictionary with keys "name", "scoring", and "value" mapping to their
respective values. Both formats convert np.floating values to Python
floats.
Parameters
----------
kind : str
One of "user" or "db"
"""
try:
value = float(self.value)
except Exception:
value = None
if kind=="user":
d = {
self.name : value,
}
elif kind=="db":
d = {
"name" : self.name,
"scoring" : self.scoring,
"value" : value,
}
else:
raise ValueError("Bad kind: {} ".format(kind))
return d
[docs] def to_string(self, kind="user"):
"""Convert to user/db format, then return string representation."""
if kind=="user":
return "{}: {}".format(self.name, self.value)
else:
raise NotImplementedError
[docs] @classmethod
def from_dict(cls, d, kind="user"):
"""Instantiate Metric from user/db format."""
if kind=="user":
assert len(d) == 1
keys = [k for k in d.keys()]
name = keys[0]
scoring = Metric.name_to_scoring(name)
value = d[name]
return cls(name, scoring, value)
elif kind=="db":
return cls(**d)
else:
raise ValueError("Bad kind: {} ".format(kind))
[docs] @staticmethod
def name_to_scoring(name):
"""Find the scoring type associated with the metric name."""
def find_in_list(list_):
for d in list_:
if d["name"] == name:
return d["scoring"]
return None
result = find_in_list(featurehub.modeling.model.Model.CLASSIFICATION_SCORING)
if result is not None:
return result
result = find_in_list(featurehub.modeling.model.Model.REGRESSION_SCORING)
if result is not None:
return result
return None
[docs]class MetricList(MutableSequence):
"""MetricList"""
def __init__(self, data=None):
super().__init__()
if data is not None:
self._list = list(data)
else:
self._list = list()
def __repr__(self):
return repr(self._list)
def __len__(self):
return len(self._list)
def __getitem__(self, ii):
return self._list[ii]
def __delitem__(self, ii):
del self._list[ii]
def __setitem__(self, ii, val):
self._list[ii] = val
def __eq__(self, other):
if not isinstance(other, MetricList):
return False
if len(self._list) != len(other._list):
return False
# Sort the lists based on Metric.name. This is because if we convert
# the MetricList to a dictionary for returning to the user, the keys
# are not in any sorted order.
# TODO The interface of this collection should be a set.
for x,y in zip(sorted(self._list), sorted(other._list)):
if x != y:
return False
return True
[docs] def insert(self, ii, val):
self._list.insert(ii, val)
[docs] def append(self, val):
self._list.append(val)
[docs] def to_string(self, kind="user"):
"""Get user-readable output."""
metrics_str = "Feature evaluation metrics: \n"
line_prefix = " "
line_suffix = "\n"
if self._list:
for metric in self._list:
metrics_str += line_prefix + metric.to_string(kind=kind) + line_suffix
else:
metrics_str += line_prefix + "<no metrics returned>" + line_suffix
return metrics_str
[docs] def convert(self, kind="user"):
"""Convert underlying metric objects.
Conversion to user format returns a dictionary with each element mapping
metric name to metric value. Conversion to db format returns a
list of dictionaries, each with keys "name", "scoring", and "value"
mapping to their respective values. Both formats convert np.floating
values to Python floats.
Parameters
----------
kind : str
One of "user" or "db"
"""
if kind=="user":
metrics = {}
for m in self._list:
metrics.update(m.convert(kind="user"))
elif kind=="db":
metrics = []
for m in self._list:
metrics.append(m.convert(kind="db"))
else:
ValueError("Bad kind: {}".format(kind))
return metrics
[docs] @classmethod
def from_dict_user(cls, d):
metrics = cls()
for key in d:
metrics.append(Metric.from_dict({key:d[key]},kind="user"))
return metrics
[docs] @classmethod
def from_list_db(cls, l):
metrics = cls()
for item in l:
metrics.append(Metric.from_dict(item,kind="db"))
return metrics
[docs] @classmethod
def from_object(cls, obj):
"""Instantiate MetricList from supported format.
Tries to detect the underlying format and deal with that appropriately.
Parameters
----------
obj: MetricList, dict, list of dict, or list of Metric
"""
if isinstance(obj, MetricList):
return obj
elif isinstance(obj, dict):
return MetricList.from_dict_user(obj)
elif isinstance(obj, list) and obj and isinstance(obj[0], dict):
return MetricList.from_list_db(obj)
elif isinstance(obj, list) and obj and isinstance(obj[0], Metric):
result = MetricList()
for metric in obj:
result.append(metric)
return result
else:
return cls()