Source code for chocolate.connection.pandas

from contextlib import contextmanager
import pickle

import numpy
import pandas

from ..base import Connection


[docs]class DataFrameConnection(Connection): """Connection to a pandas DataFrame. This connection is meant when it is not possible to use the file system or other type of traditional database (e.g. a `Kaggle <http://kaggle.com>`_ scripts) and absolutely not in concurrent processes. In fact, using this connection in different processes will result in two independent searches **not** sharing any information. Args: from_file: The name of a file containing a pickled data frame connection. Using this connection requires small adjustments to the proposed main script. When the main process finishes, all data will vanish if not explicitly writen to disk. Thus, instead of doing a single evaluation, the main process will incorporate a loop calling the search/sample ``next`` method multiple times. Additionally, at the end of the experiment, either extract the best configuration using :meth:`results_as_dataframe` or write all the data using :mod:`pickle`. """ def __init__(self, from_file=None): if from_file is not None: with open(from_file, "rb") as f: conn = pickle.load(f.read()) if type(conn) != DataFrameConnection: raise TypeError("Unpickled connection is not of type DataFrameConnection") self.results = conn.results self.complementary = conn.complementary self.space = conn.space else: self.results = pandas.DataFrame() self.complementary = pandas.DataFrame() self.space = None
[docs] @contextmanager def lock(self, *args, **kwargs): """This function does not lock anything. Do not use in concurrent processes. """ yield
[docs] def all_results(self): """Get a list of all entries of the result table. The order is undefined. """ return list(self.results.T.to_dict().values())
def find_results(self, filter): """Get a list of all results associated with *filter*. The order is undefined. """ selection = self.results for k, v in filter.items(): selection = selection[selection[k] == v] return list(selection.T.to_dict().values())
[docs] def insert_result(self, document): """Insert a new *document* in the result data frame. The columns does not need to be defined nor all present. Any new column will be added to the database and any missing column will get value None. """ self.results = self.results.append(document, ignore_index=True)
[docs] def update_result(self, document, value): """Update or add *value* of given rows in the result data frame. Args: document: An identifier of the rows to update. value: A mapping of values to update or add. """ size = len(self.results.index) selection = [True] * size for k, v in document.items(): selection = numpy.logical_and(self.results[k] == v, selection) for k, v in value.items(): if not k in self.results: self.results[k] = pandas.Series([None] * size) self.results.loc[selection, k] = v
[docs] def count_results(self): """Get the total number of entries in the result table.""" return len(self.results.index)
[docs] def all_complementary(self): """Get all entries of the complementary information table as a list. The order is undefined. """ return list(self.complementary.T.to_dict().values())
[docs] def insert_complementary(self, document): """Insert a new document (row) in the complementary information data frame.""" self.complementary = self.complementary.append(document, ignore_index=True)
[docs] def find_complementary(self, filter): """Find a document (row) from the complementary information data frame.""" selection = self.complementary for k, v in filter.items(): selection = selection[selection[k] == v] return list(selection.T.to_dict().values())[0]
[docs] def get_space(self): """Returns the space used for previous experiments.""" return self.space
[docs] def insert_space(self, space): """Insert a space in the database. Raises: AssertionError: If a space is already present. """ assert self.space is None, "Space table cannot contain more than one space, clear table first." self.space = space
[docs] def clear(self): """Clear all data.""" self.results = pandas.DataFrame() self.complementary = pandas.DataFrame() self.space = None
def pop_id(self, document): """Pops the database unique id from the document.""" return document