Source code for d3mdm.local

# -*- coding: utf-8 -*-

import logging
import os

LOGGER = logging.getLogger(__name__)


[docs]class LocalManager(object): def __init__(self, datasets_path, skip_sublevels=False): self.datasets_path = datasets_path self.skip_sublevels = skip_sublevels
[docs] @classmethod def load_folder(cls, folder, prefixes): data = dict() for name in os.listdir(folder): path = os.path.join(folder, name) if any(prefix in path or path in prefix for prefix in prefixes): if os.path.isdir(path): data[name] = cls.load_folder(path, prefixes) else: with open(path, 'rb') as f: data[name] = f.read() return data
[docs] def load(self, dataset_name, raw=False): dataset_path = os.path.join(self.datasets_path, dataset_name) LOGGER.info('Loading dataset %s', dataset_path) if raw: problem = dataset_name + '_problem' problems = [name for name in os.listdir(dataset_path) if problem in name] dataset = dataset_name + '_dataset' if self.skip_sublevels: # restrict the dataset sublevels to datasetDoc.json and tables dataset_sublevels = [ os.path.join(dataset, 'tables'), os.path.join(dataset, 'datasetDoc.json') ] prefixes = problems + dataset_sublevels else: prefixes = problems + [dataset] else: prefixes = os.listdir(dataset_path) prefixes = [os.path.join(dataset_path, prefix) for prefix in prefixes] return self.load_folder(dataset_path, prefixes)
[docs] def write(self, dataset, base_dir='', root=True): full_base_dir = os.path.join(self.datasets_path, base_dir) if root: LOGGER.info('Writing dataset %s', full_base_dir) if not os.path.exists(full_base_dir): os.makedirs(full_base_dir) for key, value in dataset.items(): path = os.path.join(base_dir, key) if isinstance(value, dict): self.write(value, path, False) else: path = os.path.join(self.datasets_path, path) LOGGER.debug("Writing file %s", path) with open(path, 'wb') as f: f.write(value)
[docs] def datasets(self): return list(sorted(os.listdir(self.datasets_path)))
[docs] def exists(self, dataset_name): dataset_path = os.path.join(self.datasets_path, dataset_name) return os.path.exists(dataset_path)