Source code for chocolate.sample.random

import numpy

from ..base import SearchAlgorithm
from .grid import ParameterGrid


[docs]class Random(SearchAlgorithm):
    """Random sampler.

    Samples the search space randomly. This sampler will draw random numbers
    for each entry in the database in order to restore the random state for
    reproductibility when used concurrently with other random samplers.

    If all parameters are discrete, the sampling is made without replacement.
    Otherwise, the exploration is conducted independently of conditional
    search space, meaning that each subspace will receive approximately the
    same number of samples.

    Args:
        connection: A database connection object.
        space: The search space to explore with only discrete dimensions. The
            search space can be either a dictionary or a
            :class:`chocolate.Space` instance.
        crossvalidation: A cross-validation object that handles experiment
            repetition.
        clear_db: If set to :data:`True` and a conflict arise between the
            provided space and the space in the database, completely clear the
            database and set the space to the provided one.
        random_state: Either a :class:`numpy.random.RandomState` instance, an
            object to initialize the random state with or
            :data:`None` in which case the global state is used.
    """
    def __init__(self, connection, space, crossvalidation=None, clear_db=False, random_state=None):
        super(Random, self).__init__(connection, space, crossvalidation, clear_db)

        # Check if all dimensions are discrete, in which case sampling
        # without replacement is possible
        self.subspace_grids = None
        if self.space.isdiscrete():
            self.subspace_grids = ParameterGrid(self.space)

        if isinstance(random_state, numpy.random.RandomState):
            self.random_state = random_state
        elif random_state is None:
            self.random_state = numpy.random
        else:
            self.random_state = numpy.random.RandomState(random_state)
        self.rndrawn = 0

    def _next(self, token=None):
        """Retrieve the next random point to test and add it to the database
        with loss set to :data:`None`. On each call random points are burnt so
        that two random sampling running concurrently with the same random
        state don't draw the same points and produce reproductible results.

        Returns:
            A tuple containing a unique token and a vector of length equal to
            the number of parameters.

        Raises:
            StopIteration: If all dimensions are discrete and all possibilities
                have been sampled.
        """
        i = self.conn.count_results()
        token = token or {}

        if self.subspace_grids is not None:
            # Sample without replacement
            l = len(self.subspace_grids)
            if i >= l:
                raise StopIteration

            # Restore state
            self.random_state.rand(i - self.rndrawn)

            drawn = [doc["_chocolate_id"] for doc in self.conn.all_results()]

            choices = sorted(set(range(l)) - set(drawn))
            sample = self.random_state.choice(choices)
            self.rndrawn += i - self.rndrawn + 1

            # Some dbs don't like numpy.int64
            token.update({"_chocolate_id": int(sample)})
            out = self.subspace_grids[sample]

        else:
            token.update({"_chocolate_id": i})

            # Restore state
            self.random_state.rand(len(self.space), (i - self.rndrawn))

            # Sample in [0, 1)^n
            out = self.random_state.rand(len(self.space))
            self.rndrawn += i - self.rndrawn + 1

        entry = {k : v for k, v in zip(self.space.names(), out)}
        entry.update(token)
        self.conn.insert_result(entry)

        return token, self.space(out)