Source code for chocolate.sample.random

import numpy

from ..base import SearchAlgorithm
from .grid import ParameterGrid


[docs]class Random(SearchAlgorithm): """Random sampler. Samples the search space randomly. This sampler will draw random numbers for each entry in the database in order to restore the random state for reproductibility when used concurrently with other random samplers. If all parameters are discrete, the sampling is made without replacement. Otherwise, the exploration is conducted independently of conditional search space, meaning that each subspace will receive approximately the same number of samples. Args: connection: A database connection object. space: The search space to explore with only discrete dimensions. The search space can be either a dictionary or a :class:`chocolate.Space` instance. crossvalidation: A cross-validation object that handles experiment repetition. clear_db: If set to :data:`True` and a conflict arise between the provided space and the space in the database, completely clear the database and set the space to the provided one. random_state: Either a :class:`numpy.random.RandomState` instance, an object to initialize the random state with or :data:`None` in which case the global state is used. """ def __init__(self, connection, space, crossvalidation=None, clear_db=False, random_state=None): super(Random, self).__init__(connection, space, crossvalidation, clear_db) # Check if all dimensions are discrete, in which case sampling # without replacement is possible self.subspace_grids = None if self.space.isdiscrete(): self.subspace_grids = ParameterGrid(self.space) if isinstance(random_state, numpy.random.RandomState): self.random_state = random_state elif random_state is None: self.random_state = numpy.random else: self.random_state = numpy.random.RandomState(random_state) self.rndrawn = 0 def _next(self, token=None): """Retrieve the next random point to test and add it to the database with loss set to :data:`None`. On each call random points are burnt so that two random sampling running concurrently with the same random state don't draw the same points and produce reproductible results. Returns: A tuple containing a unique token and a vector of length equal to the number of parameters. Raises: StopIteration: If all dimensions are discrete and all possibilities have been sampled. """ i = self.conn.count_results() token = token or {} if self.subspace_grids is not None: # Sample without replacement l = len(self.subspace_grids) if i >= l: raise StopIteration # Restore state self.random_state.rand(i - self.rndrawn) drawn = [doc["_chocolate_id"] for doc in self.conn.all_results()] choices = sorted(set(range(l)) - set(drawn)) sample = self.random_state.choice(choices) self.rndrawn += i - self.rndrawn + 1 # Some dbs don't like numpy.int64 token.update({"_chocolate_id": int(sample)}) out = self.subspace_grids[sample] else: token.update({"_chocolate_id": i}) # Restore state self.random_state.rand(len(self.space), (i - self.rndrawn)) # Sample in [0, 1)^n out = self.random_state.rand(len(self.space)) self.rndrawn += i - self.rndrawn + 1 entry = {k : v for k, v in zip(self.space.names(), out)} entry.update(token) self.conn.insert_result(entry) return token, self.space(out)