Source code for selector.read_files

"""This module contains functions for reading in files for the scenario."""
import re
import warnings
from selector.pool import Parameter, ParamType

import numpy as np

boolean_yes = ["on", "yes", "true"]
boolean_no = ["no", "off", "false"]
boolean_options = boolean_no + boolean_yes


[docs]def get_ta_arguments_from_pcs(para_file): """ Read a file that contains the target algorithm parameters. The file follows the `.pcs` format and adheres to the AClib structure. Parameters ---------- para_file : str Path to the `.pcs` file. Returns ------- tuple - **parameters**: list, information on the parameters of the target algorithm. - **no_goods**: list, information about forbidden parameter value combinations. - **conditionals**: list, information about conditional parameter value combinations. """ no_goods = [] parameters = [] conditionals = {} with open(para_file, 'r') as pf: for line in pf: line = line.strip().split("#", 1)[0] # skip empty lines if line == "": continue line_split = line.split(None, 1) param_name = line_split[0].replace(" ", "") param_info = line_split[1] # TODO This will error if forbidden params do not have any spaces if "|" not in param_info: # cat if '{' in param_info: param_type, bounds, defaults, original_bound = get_categorical(param_name, param_info) if param_type is not None: parameters.append(Parameter(param_name, param_type, bounds, defaults, {}, '', original_bound)) # forbidden elif '{' in param_name: no_good = get_no_goods(line, parameters) no_goods.append(no_good) # cont. elif '[' in param_info: param_type, bounds, defaults, scale = get_continuous(param_name, param_info) parameters.append(Parameter(param_name, param_type, bounds, defaults, {}, scale, [])) # conditionals elif '|' in param_info: condition_param, condition = get_conditional(param_name, param_info, parameters) if param_name not in conditionals: conditionals[param_name] = {condition_param: condition} else: conditionals[param_name].update({condition_param: condition}) else: raise ValueError(f"The parameter file {para_file} contains unreadable elements. Check that" f" the structure adheres to AClib") # adding conditionals to parameters for pc in conditionals: condition_found = False for parameter in parameters: if re.search(r'\b' + str(pc) + r'\b', parameter.name): parameter.condition.update(conditionals[pc]) condition_found = True # This should only be a warning: We may have conditions for cat. parameters that are not configurable. # We ignore these. if not condition_found: warnings.warn(f"Condition {pc}|{conditionals[pc]} will be dropped since either {pc} is " f"not configurable or does not exist") return parameters, no_goods, conditionals
[docs]def get_categorical(param_name, param_info): """ For a categorical parameter: check if its parsed attributes are valid and extract information on the parameter Parameters ---------- param_name : str Name of the parameter. param_info : dict Raw parameter information. Returns ------- tuple - **param_type** : str, Type of the parameter. - **bounds** : tuple, Formatted bounds of the parameter. - **defaults** : tuple, Default values of the parameter. - **bounds** : tuple, Original bounds of the parameter. """ bounds = re.search(r'\{(.*)\}', param_info).group().strip("{ }").split(",") bounds = [b.replace(" ","") for b in bounds] defaults = re.findall(r'\[(.*)\]*]', param_info) original_bound = [] if len(bounds) == 1: warnings.warn(f"For parameter {param_name} bounds of length 1 were passed. The parameter will " f"be ignored for configuration.") param_type, bounds, defaults = None, None, None elif bounds[0] in boolean_options and bounds[1] in boolean_options: param_type = ParamType.categorical original_bound = bounds if defaults[0] in boolean_yes: defaults = True elif defaults[0] in boolean_no: defaults = False else: raise ValueError(f"For parameter {param_name} the parsed defaults are not within [yes, no, on, off]") bounds = [b in boolean_yes for b in bounds] bounds = sorted(bounds) elif isinstance(str(bounds[0]), str) & isinstance(str(defaults[0]), str): param_type = ParamType.categorical defaults = str(defaults[0]) bounds = [str(b).replace(" ", "") for b in bounds] if defaults not in bounds: raise ValueError(f"For parameter {param_name} the default value is not within the range of the bounds") else: raise ValueError(f"For parameter {param_name} the parsed bounds were not boolean or categorical") return param_type, bounds, defaults, original_bound
[docs]def get_continuous(param_name, param_info): """ For a continuous parameter: check if its parsed attributes are valid and extract information on the parameter Parameters ---------- param_name : str Name of the parameter. param_info : dict Raw parameter information. Returns ------- tuple - **param_type** : str, Type of the parameter. - **bounds** : tuple, Formatted bounds of the parameter. - **defaults** : tuple, Default values of the parameter. - **bounds** : tuple, Original bounds of the parameter. """ scale = re.search(r'[a-zA-Z]+', param_info) param_info = re.findall(r'\[[^\]]*]', param_info) bounds = param_info[0].strip("[] ").split(",") defaults = param_info[1].strip("[] ") # checking for set scale if scale and "i" in scale.group(): param_type = ParamType.integer scale = scale.group().strip("i") if isinstance(int(defaults), int): defaults = int(defaults) else: raise ValueError(f"For parameter {param_name} the parsed defaults are not integer") bounds = [int(b) for b in bounds] else: param_type = ParamType.continuous if isinstance(float(defaults), float): defaults = float(defaults) else: raise ValueError(f"For parameter {param_name} the parsed defaults are not continuous") bounds = [float(b) for b in bounds] if scale is None: scale = '' else: scale = scale.group() if not bounds[0] <= defaults <= bounds[1]: raise ValueError(f"For parameter {param_name} the default value is not within the range of the bounds") return param_type, bounds, defaults, scale
[docs]def get_conditional(param_name, param_info, parameters): """ For a parameter: get the information on conditionals Parameters ---------- param_name : str Name of the parameter. param_info : dict Raw parameter information. Returns ------- tuple - **condition_param** : str, Conditional parameter. - **condition** : str, The condition. """ param_info = param_info.strip(" | ") condition = re.search(r'\{(.*)\}', param_info).group().strip("{ }").split(",") condition_param = re.search(r'.+?(?= in)', param_info).group().replace(" ", "") for p in parameters: if p.name == condition_param: p_type = p.type if condition[0] in boolean_options: condition = [c in boolean_yes for c in condition] elif p_type == ParamType.categorical: condition = [str(c).strip(" ") for c in condition] elif p_type == ParamType.continuous: condition = [float(c) for c in condition] elif p_type == ParamType.integer: condition = [int(c) for c in condition] else: raise ValueError(f"For parameter {param_name} the parsed conditions could not be read") return condition_param, condition
[docs]def get_no_goods(no_good, parameters): """ Takes a string of the form: {param_1=value_1 , param_2=value_2, ...} and returns a dictionary of the no good Parameters ---------- no_good : str Takes a string of the form: {param_1=value_1 , param_2=value_2, ...} parameters : dict Raw parameter information. Returns ------- dict A dictionary of the no good. """ forbidden = {} no_good = no_good.strip("{ }").split(",") for ng in no_good: param, value = ng.split("=") param = param.strip() value = value.strip() for p in parameters: if p.name == param: p_type = p.type if value in boolean_yes: value = True elif value in boolean_no: value = False elif p_type == ParamType.continuous: value = float(value) elif p_type == ParamType.integer: value = int(value) elif p_type == ParamType.categorical: value = str(value) else: raise ValueError(f"For no good {no_good} the parameter values are not known") forbidden[param] = value return forbidden
[docs]def read_instance_paths(instance_set_path): """ Read in instances from an AClib instance file Parameters ---------- instance_set_path : str Path to the instance file. Returns ------- list List of paths to the instances. """ instance_set = [] with open(instance_set_path, 'r') as f: for line in f: instance_set.append(line.strip()) seen = set() uniq = [] for i in instance_set: if i not in seen: uniq.append(i) seen.add(i) else: warnings.warn(f"Instance {i} is not unique in the train set") return instance_set
[docs]def read_instance_features(feature_set_path): """ Read in features from an AClib features file Parameters ---------- feature_set_path : str Path to the feature file. Returns ------- tuple - **features** : dict, Dictionary with the read-in features. - **feature_names** : list, List with the feature names. """ features = {} with open(feature_set_path, 'r') as f: lines = f.readlines() feature_names = lines[0].strip().split(",")[1:] for line in lines[1:]: line = line.strip().split(",") if line[0] != "": features[line[0]] = np.array(line[1:], dtype=np.single) return features, feature_names