Source code for respyabc.models

"""This module contains the model functions that can be passed to
the :func:`respyabc.respyabc()` function and functions that
support these models."""

import numpy as np


[docs]def compute_model( parameter, model_to_simulate, parameter_for_simulation, options_for_simulation, descriptives="choice_frequencies", ): """Compute K&W 1994 model. Function is a wrapper around the `respy.get_simulate_func()` function to compute the model using the parameters from Kean & Wolpin 1994 but being able to vary over thr parameters. Parameters ---------- parameter : dict A dictionary contaning the variables as key and the corresponding magnitude as value. model_to_simulate : func Function produced by :func:`respyabc.respyabc.get_simulate_func_options()` Model that specififes the respy set-up. parameter_for_simulation : pandas.DataFrame Parameter that specify the respy model. options_for_simulation : pandas.DataFrame Options that specify the respy model. descriptives : {`choice_frequencies``,``wage_moments``}, optional Determines how the descriptives with which the distance is computed are computed. The default is ``"choice_frequencies"``. Returns ------- output_frequencies : dict A dictionary containing the relative frequencies of each choice in each period. """ keys = list(parameter.keys()) params_single_index = transform_multiindex_to_single_index( df=parameter_for_simulation, column1="category", column2="name", link="_" ) for index in keys: params_single_index.loc[index, ("value")] = parameter[index] parameter_for_simulation["value"] = np.array(params_single_index["value"]) options_for_simulation["simulation_seed"] = np.random.randint(0, 1000000000) options_for_simulation["solution_seed"] = np.random.randint(0, 1000000000) options_for_simulation["estimation_seed"] = np.random.randint(0, 1000000000) df_simulated_model = model_to_simulate( parameter_for_simulation, options=options_for_simulation ) if descriptives == "choice_frequencies": output = compute_choice_frequencies_to_model_output_frequencies( df=df_simulated_model ) elif descriptives == "wage_moments": output = {"data": np.array(fill_nan(compute_wage_moments(df_simulated_model)))} return output
[docs]def compute_choice_frequencies_to_model_output_frequencies(df): """Processes the choice frequencies to the output frequencies. Parameters ---------- df : pandas.DataFrame Data frame for which the choice frequencies should be created. Returns ------- output_frequencies : dict A dictionary containing the relative frequencies of each choice in each period. """ df_frequencies = compute_choice_frequencies(df) for index in ["a", "b", "edu", "home"]: if index not in df_frequencies.columns: df_frequencies[index] = 0 df_frequencies.sort_index(axis=1, inplace=True) output_frequencies = {"data": np.array(fill_nan(df_frequencies))} return output_frequencies
[docs]def compute_choice_frequencies(df): """Calculate choice frequencies per Period in the discrete choice model. Parameters ---------- df : pandas.DataFrame A pandas data frame containing the output of the discrete choice model. Returns ------- A pandas data frame containing the relative choice frequencies of each period. """ return df.groupby("Period").Choice.value_counts(normalize=True).unstack()
[docs]def fill_nan(df): """Fill missing values in data frame with zeros. Parameters ---------- df : pandas.DataFrame A pandas data frame containing missing values. Returns ------- A pandas data frame containing zeros instead of the missing values. """ return df.fillna(0)
[docs]def compute_wage_moments(df): """Calculate first and second wage moment in the discrete choice model. Parameters ---------- df : pandas.DataFrame A pandas data frame containing the output of the discrete choice model. Returns ------- A pandas data frame containing the first and second wage moments of each period. """ return df.groupby(["Period"])["Wage"].describe()[["mean", "std"]]
[docs]def transform_multiindex_to_single_index(df, column1, column2, link="_"): """Replaces a multiindex with a concatenated single index version of the multiindex. Parameters ---------- df : pandas.DataFrame Pandas data frame with a multiindex. column1 : str Name of first multiindex column. column2 : str Name of second multiindex column. link : str, optional String that is used to seperate the two multiindex columns within the new string. Returns ------- Single indexed data frame. """ index1 = df.index.get_level_values(column1) index2 = df.index.get_level_values(column2) single_index = index1 + link + index2 df2 = df.reset_index(drop=True) df2.set_index(single_index, inplace=True) return df2