-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathplot_purity.py
More file actions
102 lines (83 loc) · 4.17 KB
/
plot_purity.py
File metadata and controls
102 lines (83 loc) · 4.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import os
import argparse
from itertools import product
def plotfinale2(file):
"""This function plot in a 2D histogram the data in the
_MC files with correct PDGcode and not"""
# pdgcode
pdg = {'electrons': 11, 'pi': 211, 'kaons': 321, 'protons': 2212,
'He3': 1000020030, 'triton': 1000010030, 'deuterons': 1000010020}
# keys for dictionary
keys = list(map(lambda x: x.split('_')[0], file))
# dictionary for data
data = dict(zip(keys, file))
# create dataframe for dictionaries
for k in data:
data[k] = pd.read_parquet(data[k])
# data dataframe
data_pure = {}
# correct pdg code
for specie in data:
new_df = data[specie].query('PDGcode == {0}'.format(pdg[specie]))
data_pure.update({specie: new_df})
# raw data plot
ptraw = {}
ptpure = {}
for k in data:
ptraw[k] = data[k].loc[:, 'p'].values
ptpure[k] = data_pure[k].loc[:, 'p'].values
#width for hist
width3 = [0.3, 0.5, 0.75, 1, 1.5, 3, 5, 10]
width2 = [0.3, 0.5, 0.75, 1, 1.5, 3, 5, 10]
width = np.arange(0,6,0.1)
#labels and colors
color = ['royalblue', 'orangered', 'red', 'forestgreen', 'black', 'mediumslateblue', 'slategray']
labels = {'electrons': 'electrons', 'pi': 'pions', 'kaons': 'kaons', 'protons': 'protons','He3': 'He', 'triton': 'triton', 'deuterons': 'deuterons'}
#pure data vs raw percentage, dividing species in two groups for better visualization
np.seterr(divide='ignore', invalid='ignore')
xer, x = [], []
for binposition in range(len(width3)-1):
xer.append((width3[binposition+1]-width3[binposition])/2)
x.append((width3[binposition]+width3[binposition+1])/2)
_, axes = plt.subplots(2, 4, figsize=[12, 7])
for ipad, (j, index) in enumerate(zip(labels, product([0,1], [0,1,2,3]))):
dfraw = (pd.DataFrame(ptraw[j], columns=['p']).assign(Bin = lambda x: pd.cut(x.p, bins=width2)).groupby(['Bin']).agg({'p': ['sum']}))
dfpure = (pd.DataFrame(ptpure[j], columns=['p']).assign(Bin=lambda x: pd.cut(x.p, bins=width2)).groupby(['Bin']).agg({'p': ['sum']}))
dfrawcount = (pd.DataFrame(ptraw[j], columns=['p']).assign(Bin = lambda x: pd.cut(x.p, bins=width2)).groupby(['Bin']).agg({'p': ['count']}))
dfpurecount = (pd.DataFrame(ptpure[j], columns=['p']).assign(Bin = lambda x: pd.cut(x.p, bins=width2)).groupby(['Bin']).agg({'p': ['count']}))
yer = np.divide(np.sqrt(np.asarray(dfpurecount)*(1 - np.divide(dfpurecount,dfrawcount))),dfrawcount)
axes[index[0], index[1]].errorbar(x, np.asarray(np.divide(dfpure,dfraw)), xerr = xer , yerr = np.asarray(yer), c=color[ipad], fmt='o', capsize=3, elinewidth=0.5)
axes[index[0], index[1]].set_xlabel('p (GeV/c)')
axes[index[0], index[1]].set_ylabel('purity')
axes[index[0], index[1]].set_xscale('log')
axes[index[0], index[1]].set_ylim((0., 1.2))
axes[index[0], index[1]].set_title(labels[j])
axes.flat[-1].set_visible(False)
plt.tight_layout()
plt.savefig('plot_purity_MC.png')
_, axes2 = plt.subplots(2, 4, figsize=[12, 7])
for ipad, (j, index) in enumerate(zip(labels, product([0,1], [0,1,2,3]))):
axes2[index[0], index[1]].hist(ptraw[j], bins = width, color = color[0], label = 'raw', edgecolor='white')
axes2[index[0], index[1]].hist(ptpure[j], bins = width, color = color[2], label = 'pure', edgecolor='white', alpha = 0.7)
axes2[index[0], index[1]].set_title('{0} sample'.format(labels[j]))
axes2[index[0], index[1]].set_xlabel('p (GeV/c)')
axes2[index[0], index[1]].set_ylabel('counts')
axes2.flat[-1].set_visible(False)
plt.tight_layout()
plt.legend()
plt.show()
# flags
PARSER = argparse.ArgumentParser(description='Arguments to pass')
PARSER.add_argument('dir', metavar='text', default='.',
help='flag to search in directory, remember put all paths!')
ARGS = PARSER.parse_args()
# files
files = [f for f in os.listdir(ARGS.dir) if 'MC.parquet.gzip' in f]
files.remove('kaons_fromTOF_MC.parquet.gzip')
# plot
os.chdir(ARGS.dir)
plotfinale2(files)