# -*- coding: utf-8 -*-
import logging
import os
LOGGER = logging.getLogger(__name__)
[docs]class LocalManager(object):
def __init__(self, datasets_path, skip_sublevels=False):
self.datasets_path = datasets_path
self.skip_sublevels = skip_sublevels
[docs] @classmethod
def load_folder(cls, folder, prefixes):
data = dict()
for name in os.listdir(folder):
path = os.path.join(folder, name)
if any(prefix in path or path in prefix for prefix in prefixes):
if os.path.isdir(path):
data[name] = cls.load_folder(path, prefixes)
else:
with open(path, 'rb') as f:
data[name] = f.read()
return data
[docs] def load(self, dataset_name, raw=False):
dataset_path = os.path.join(self.datasets_path, dataset_name)
LOGGER.info('Loading dataset %s', dataset_path)
if raw:
problem = dataset_name + '_problem'
problems = [name for name in os.listdir(dataset_path) if problem in name]
dataset = dataset_name + '_dataset'
if self.skip_sublevels:
# restrict the dataset sublevels to datasetDoc.json and tables
dataset_sublevels = [
os.path.join(dataset, 'tables'),
os.path.join(dataset, 'datasetDoc.json')
]
prefixes = problems + dataset_sublevels
else:
prefixes = problems + [dataset]
else:
prefixes = os.listdir(dataset_path)
prefixes = [os.path.join(dataset_path, prefix) for prefix in prefixes]
return self.load_folder(dataset_path, prefixes)
[docs] def write(self, dataset, base_dir='', root=True):
full_base_dir = os.path.join(self.datasets_path, base_dir)
if root:
LOGGER.info('Writing dataset %s', full_base_dir)
if not os.path.exists(full_base_dir):
os.makedirs(full_base_dir)
for key, value in dataset.items():
path = os.path.join(base_dir, key)
if isinstance(value, dict):
self.write(value, path, False)
else:
path = os.path.join(self.datasets_path, path)
LOGGER.debug("Writing file %s", path)
with open(path, 'wb') as f:
f.write(value)
[docs] def datasets(self):
return list(sorted(os.listdir(self.datasets_path)))
[docs] def exists(self, dataset_name):
dataset_path = os.path.join(self.datasets_path, dataset_name)
return os.path.exists(dataset_path)