Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions cobra/evaluation/pigs_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
with plt.style.context("seaborn-ticks"):
fig, ax = plt.subplots(figsize=dim)

# -----------------
# --------------------------
# Left axis - average target
# -----------------
# --------------------------
ax.plot(df_plot['label'], df_plot['avg_target'],
color="#00ccff", marker=".",
markersize=20, linewidth=3,
Expand All @@ -168,7 +168,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
ax.set_ylabel('incidence' if model_type == "classification" else "mean target value",
fontsize=16)
ax.set_xlabel('{} bins' ''.format(variable), fontsize=16)
ax.xaxis.set_tick_params(rotation=45, labelsize=14)
ax.xaxis.set_tick_params(labelsize=14)
plt.setp(ax.get_xticklabels(),
rotation=45, ha="right", rotation_mode="anchor")
ax.yaxis.set_tick_params(labelsize=14)

if model_type == "classification":
Expand All @@ -177,6 +179,24 @@ def plot_incidence(pig_tables: pd.DataFrame,
ax.set_yticks(np.arange(0, max(df_plot['avg_target'])+0.05, 0.05))
ax.yaxis.set_major_formatter(
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))
elif model_type == "regression":
# If both the difference between the highest avg target of all bins
# versus the global avg target AND the difference between the
# lowest avg target versus the global avg target are both smaller
# than 25% of the global avg target itself, we increase the y
# axis range, to avoid that the minor avg target differences are
# spread out over the configure figure height, suggesting
# incorrectly that there are big differences in avg target across
# the bins and versus the global avg target.
# (Motivation for the AND above: if on one end there IS enough
# difference, the effect that we discuss here does not occur.)
global_avg_target = max(df_plot['global_avg_target']) # series of same number, for every bin.
if (np.abs((max(df_plot['avg_target']) - global_avg_target))
/ global_avg_target < 0.25) \
and (np.abs((min(df_plot['avg_target']) - global_avg_target))
/ global_avg_target < 0.25):
ax.set_ylim(global_avg_target * 0.75,
global_avg_target * 1.25)

# Remove ticks but keep the labels
ax.tick_params(axis='both', which='both', length=0)
Expand All @@ -192,13 +212,13 @@ def plot_incidence(pig_tables: pd.DataFrame,
align='center', color="#939598", zorder=1)

# Set labels & ticks
ax2.set_ylabel('population size', fontsize=16)
ax2.set_xlabel('{} bins' ''.format(variable), fontsize=16)
ax2.xaxis.set_tick_params(rotation=45, labelsize=14)

ax2.yaxis.set_tick_params(labelsize=14)
ax2.yaxis.set_major_formatter(
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))

ax2.set_ylabel('population size', fontsize=16)
ax2.tick_params(axis='y', colors="#939598")
ax2.yaxis.label.set_color('#939598')

Expand Down