Source code for plot_histograms

"""
Plot and save histograms of measurement frequency per variable category
"""
try:
    from bedPyLoad.categorize import *
except Exception as e:
    print('Import incomplete - errors likely:\n' + str(e))


[docs] def plot_category_histograms(directory): """ plot histograms of all dataframe columns (as per the config.py) :param str directory: tell where plots should be saved; either absolute or relative path ending on '/' Important: relative paths should not start with '/' or '\\' :return: 0 if successful """ if not os.path.isdir(directory): os.makedirs(directory) for par in FULL_LABEL_DICT.keys(): # check if categorical or numeric Series to define bins df4a = df[par].dropna() if df4a.dtype == float: print('...skipping numeric type: ' + par) continue print('PLOTTING: ' + par) # prepare plot fig, ax = plt.subplots(1, 1, figsize=(12, 6)) # make x-labels try: if not SUB_CATEGORY_ORDER[par]: cat_order = df4a.unique().tolist() else: cat_order = SUB_CATEGORY_ORDER[par] except KeyError: # skip irrelevant undefined entries in SUB_CATEGORY_ORDER dict continue # make ys (number of occurrences) freq_dict = df4a.value_counts().to_dict() frequencies = [freq_dict[i] for i in cat_order] # agglomerate into df plot_df = pd.DataFrame( list(zip(cat_order, frequencies)), columns=['Categories', 'Number of Measurements'] ) sns.set_style('whitegrid') # options: darkgrid, whitegrid, dark, white, ticks sns.barplot( data=plot_df, x='Categories', y='Number of Measurements', palette='blend:#7AB,#EDA', # 'dark:#5A9_r', edgecolor='.3', orient='v', ax=ax, alpha=0.5, ) ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator()) ax.grid(visible=True, which='minor', color=[0.88, 0.88, 0.88], linewidth=0.5) # ax.set_title(FULL_LABEL_DICT[par]) ax.set(xlabel=None, ylim=(1, 10000)) ax.set_yscale('log') plt.xticks(rotation=45) # rotate too-long xlabels fig.savefig(directory + '%s.png' % par, dpi=600, bbox_inches='tight') print('- saved: ' + directory + '%s.png' % par) ax.cla() plt.close()
if __name__ == '__main__': use_directory = 'figures/histograms/' plot_category_histograms(use_directory)