Use anajob to study the run

With anajob, we can get a file-level, event-level and collection-level overview.

# # Data can be produced with:
# cd /data_ilc/flc/kunath/local_only/eehiq
# source /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02-03/init_ilcsoft.sh
# anajob rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000.d_dstm_15783_0.slcio > anajob.txt
#
# Or alternatively, if you have the pyLCIO kernel set up, uncomment the following line:
# !cd /data_ilc/flc/kunath/local_only/eehiq; if [ ! -f anajob.txt ]; then anajob rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000.d_dstm_15783_0.slcio > anajob.txt; else echo "File already exists!"; fi
import io
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
from IPython.display import HTML

data_folder = "/data_ilc/flc/kunath/local_only/eehiq"
with open(data_folder + "/anajob.txt") as f:
    raw_txt = f.read()
def remove_prefix(full_str, prefix):
    assert full_str.startswith(prefix)
    return full_str[len(prefix) :]


def remove_suffix(full_str, suffix):
    assert full_str.endswith(suffix)
    return full_str[: -len(suffix)]


class Anajob:
    _header_tag = "anajob:  will open and read from files: \n\n"
    _header_end_tag = "\n\n will reopen and read from files: \n"
    _run_tag = "\n  Run : "
    _event_tag = "///////////////////////////////////\nEVENT: "

    def __init__(self, raw_string, max_events=-1):
        str_events = raw_string.split(self._event_tag)

        str_header = str_events.pop(0)
        self.run_df = self._process_header(str_header)
        self.n_events_used = self.n_events if max_events == -1 else int(max_events)
        print(self.__repr__())

        self.event_header = {"EVENT": [], "RUN": [], "DETECTOR": []}
        str_events[-1] = remove_suffix(
            str_events[-1],
            f"{self.n_events} events read from files: \n     {self.file_name}\n",
        )
        assert len(str_events) >= self.n_events_used
        self.df = pd.concat(map(self._make_event, str_events[: self.n_events_used]))
        self.event_header = pd.DataFrame(self.event_header).set_index("EVENT")

        assert self.n_runs == len(self.event_header["RUN"].unique())
        assert self.n_events_used == len(self.event_header)
        assert self.n_events_used == len(self.df["EVENT"].unique())

    def __repr__(self):
        return "\n".join(
            [
                f"{__class__.__name__} with {self.n_events_used}/{self.n_events} "
                f"events from file {self.file_name}"
            ]
        )

    def _make_event(self, str_event):
        event_lines = str_event.split("\n")
        i_event = int(event_lines.pop(0))
        self.event_header["EVENT"].append(i_event)
        self.event_header["RUN"].append(int(remove_prefix(event_lines.pop(0), "RUN: ")))
        self.event_header["DETECTOR"].append(
            remove_prefix(event_lines.pop(0), "DETECTOR: ")
        )
        event_lines.pop(0)  # COLLECTIONS: (see below)
        event_lines.pop(0)  # ///////////////////////////////////
        event_lines.pop(0)  #
        event_lines.pop(0)  # -----------------------------------
        event_lines.pop(1)  # ===================================
        while event_lines[-1].strip(" ") == "":
            event_lines.pop()
        event_lines.pop()  # -----------------------------------
        table_str = "\n".join(event_lines).split("  ")
        table_str = filter(None, table_str)
        table_str = map(lambda x: x.strip(" "), table_str)
        table_str = "\t".join(table_str)
        table_str = table_str.replace("\t\n", "\n")
        col_df = pd.read_csv(io.StringIO(table_str), sep="\t")
        col_df["EVENT"] = i_event
        if not i_event % 1000:
            print(f"{i_event:>6}/{self.n_events_used} events\r")
        return col_df

    def _process_header(self, str_header):
        run_infos = str_header.split(self._run_tag)
        header = run_infos.pop(0)
        assert header.startswith(self._header_tag)
        files = header[len(self._header_tag) :].split("\n")
        assert files.pop() == ""
        assert len(files) == 1
        for str_file in files:
            self.file_name, str_file = str_file.strip().split(maxsplit=1)
            str_file = str_file.strip()
            assert str_file.startswith("[ ") and str_file.endswith(" ]")
            self.n_runs, self.n_events = str_file[2:-2].split(", ")
            self.n_runs = int(remove_prefix(self.n_runs, "number of runs: "))
            self.n_events = int(remove_prefix(self.n_events, "number of events: "))
        run_infos[-1] = remove_suffix(
            run_infos[-1], self._header_end_tag + "     " + self.file_name + "\n"
        )
        run_series = {}
        for i, run_info in enumerate(run_infos):
            run_series[i] = self._make_run(run_info)
        return pd.concat(run_series, axis=1).transpose()

    def _make_run(self, str_run):
        run_dict = {}
        lines = str_run.split("\n")
        assert lines.pop().strip().strip("-") == ""
        i_run, detector_tag = lines.pop(0).split(" - ")
        run_dict["RUN"] = int(i_run)
        run_dict["DETECTOR"] = detector_tag.rstrip(": ")
        for line in lines:
            key, val = line.split(": ", maxsplit=1)
            key = remove_prefix(key, " parameter ")
            key = remove_suffix(key, " [string]")
            run_dict[key] = val.rstrip(", ")
        return pd.Series(run_dict)


print("For the run overview, it is not necessary to read all the events.")
aj = Anajob(raw_txt, max_events=3)
For the run overview, it is not necessary to read all the events.
Anajob with 3/43200 events from file rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000.d_dstm_15783_0.slcio

Run information

We can see that this run consists of 72 parts.

run_df = aj.run_df
run_df
RUN DETECTOR CommandLine DD4HEPVersion DateUTC GEANT4Version ILCSoft_location LCIOFileName SteeringFileContent User ... random.file random.luxury random.replace_gRandom random.seed random.type runType skipNEvents steeringFile vertexOffset vertexSigma
0 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:41:59.159231 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilcusr000 ... None 1 True 157824 None batch 0 /var/lib/condor/execute/dir_25888/DIRAC_JCtiDd... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
1 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:43:00.420341 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc032 ... None 1 True 1578221 None batch 0 /pool/condor/dir_87689/DIRAC__LfCKVpilot/60176... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
2 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:43:57.852468 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc033 ... None 1 True 1578229 None batch 0 /home/pool/condor/dir_10221/DIRAC_LMS78spilot/... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
3 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:39:57.254905 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc045 ... None 1 True 157829 None batch 0 /var/lib/condor/execute/dir_29408/DIRAC_VQPEfu... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
4 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:42:11.717918 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc006 ... None 1 True 1578216 None batch 0 /scratch/condor_pool/condor/dir_3789/rJ0NDmECq... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
67 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:42:46.151946 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc032 ... None 1 True 1578278 None batch 0 /pool/condor/dir_265147/DIRAC_bBbVv1pilot/6017... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
68 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:42:28.066133 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc006 ... None 1 True 1578231 None batch 0 /scratch/condor_pool/condor/dir_53522/iZVKDmNC... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
69 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:43:05.691181 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc006 ... None 1 True 1578272 None batch 0 /scratch/condor_pool/condor/dir_181389/flnNDmY... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
70 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:42:19.101171 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc032 ... None 1 True 1578264 None batch 0 /pool/condor/dir_12979/DIRAC_PM8x8fpilot/60176... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]
71 0 ILD_l5_v02 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... v01-11 2022-01-26 05:41:23.246551 UTC $Name: geant4-10-04-patch-03 $ /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02 rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I5000... ##############################################... ilc041 ... None 1 True 1578255 None batch 0 /var/lib/condor/execute/dir_7589/DIRAC_zscCfsp... [0.0, 0.0, 0.0, 0.0] [0.0, 0.0, 0.202, 0.0]

72 rows × 93 columns

Fields with unique values over all runs

repeated_info = run_df.transpose()[run_df.nunique().values == 1][0]
steering_file = repeated_info.pop("SteeringFileContent")
_df = pd.DataFrame(
    np.array([repeated_info.index.values, repeated_info.values]).T.reshape(-1, 2)
)
_df.columns = ["field name", "unique value"]
HTML(_df.to_html(index=False))
field name unique value
RUN 0
DETECTOR ILD_l5_v02
DD4HEPVersion v01-11
GEANT4Version $Name: geant4-10-04-patch-03 $
ILCSoft_location /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02
LCIOFileName rv02-02.sv02-02.mILD_l5_o1_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000.d_dstm_15783_0.slcio
_dumpParameter False
_dumpSteeringFile False
_errorMessages []
_g4gps None
_g4gun None
action.calo Geant4ScintillatorCalorimeterAction
action.mapActions {u'tpc': u'TPCSDAction'}
action.tracker (u'Geant4TrackerWeightedAction', {u'HitPositionCombination': 2, u'CollectSingleDeposits': False})
compactFile /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02/lcgeo/v00-16-06/ILD/compact/ILD_l5_v02/ILD_l5_v02.xml
crossingAngleBoost 0.007
enableDetailedShowerMode True
enableG4GPS False
enableG4Gun False
enableGun False
field.delta_chord 1e-05
field.delta_intersection 1e-05
field.delta_one_step 0.0005
field.eps_max 0.0001
field.eps_min 1e-05
field.equation Mag_UsualEqRhs
field.largest_step 10000.0
field.min_chord_step 0.01
field.stepper HelixSimpleRunge
filter.calo edep0
filter.filters {u'edep0': {u'parameter': {u'Cut': 0.0}, u'name': u'EnergyDepositMinimumCut/Cut0'}, u'edep1kev': {u'parameter': {u'Cut': 0.001}, u'name': u'EnergyDepositMinimumCut'}, u'geantino': {u'parameter': {}, u'name': u'GeantinoRejectFilter/GeantinoRejector'}}
filter.mapDetFilter {u'TPC': None}
filter.tracker edep1kev
guineapig._parameters {u'ParticlesPerEvent': '-1'}
guineapig.particlesPerEvent -1
gun.direction (0, 0, 1)
gun.distribution None
gun.energy 10000.0
gun.isotrop False
gun.multiplicity 1
gun.particle mu-
gun.phiMax None
gun.phiMin None
gun.position (0.0, 0.0, 0.0)
gun.thetaMax None
gun.thetaMin None
lcgeo_location /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02/lcgeo/v00-16-06
lcio._parameters {u'MCParticleCollectionName': u'MCParticle'}
lcio.mcParticleCollectionName MCParticle
macroFile
meta.eventNumberOffset 0
meta.eventParameters []
meta.runNumberOffset 0
numberOfEvents 600
output.inputStage 3
output.kernel 3
output.part 3
output.random 6
part.enableDetailedHitsAndParticleInfo False
part.keepAllParticles False
part.minDistToParentVertex 2.2e-14
part.minimalKineticEnergy 1.0
part.printEndTracking False
part.printStartTracking False
part.saveProcesses [u'Decay']
part.userParticleHandler Geant4TCUserParticleHandler
physics.decays False
physics.list QGSP_BERT
physics.pdgfile /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02-02/DD4hep/v01-11-02/DDG4/examples/particle.tbl
physics.rangecut 0.1
physics.rejectPDGs set([1, 2, 3, 4, 5, 6, 21, 23, 24, 25])
physics.zeroTimePDGs set([17, 11, 13, 15])
physicsList None
printLevel 3
random.enableEventSeed False
random.file None
random.luxury 1
random.replace_gRandom True
random.type None
runType batch
skipNEvents 0
vertexOffset [0.0, 0.0, 0.0, 0.0]
vertexSigma [0.0, 0.0, 0.202, 0.0]

Steering file

print("\n".join(steering_file.split("\\n")))
######################################################################
#
#  standard steering file for ILD simulation 
#  
#
#
######################################################################
from DDSim.DD4hepSimulation import DD4hepSimulation
from g4units import m, mm, GeV, MeV, rad
import os

SIM = DD4hepSimulation()

## The compact XML file
SIM.compactFile = ""
## Lorentz boost for the crossing angle, in radian!
SIM.crossingAngleBoost = 7.e-3*rad
SIM.enableDetailedShowerMode = True
SIM.enableG4GPS = False
SIM.enableG4Gun = False
SIM.enableGun = False
## InputFiles for simulation .stdhep, .slcio, .HEPEvt, .hepevt, .hepmc, .pairs files are supported
SIM.inputFiles = []
## Macro file to execute for runType 'run' or 'vis'
SIM.macroFile = ""
## number of events to simulate, used in batch mode
SIM.numberOfEvents = 3
## Outputfile from the simulation,only lcio output is supported
SIM.outputFile = "dummyOutput.slcio"
## Physics list to use in simulation
SIM.physicsList = None
## Verbosity use integers from 1(most) to 7(least) verbose
## or strings: VERBOSE, DEBUG, INFO, WARNING, ERROR, FATAL, ALWAYS
SIM.printLevel = "INFO"
## The type of action to do in this invocation
## batch: just simulate some events, needs numberOfEvents, and input file or gun
## vis: enable visualisation, run the macroFile if it is set
## run: run the macroFile and exit
## shell: enable interactive session
SIM.runType = "batch"
## Skip first N events when reading a file
SIM.skipNEvents = 0
## Steering file to change default behaviour
SIM.steeringFile = None
## FourVector of translation for the Smearing of the Vertex position: x y z t
SIM.vertexOffset = [0.0, 0.0, 0.0, 0.0]
## FourVector of the Sigma for the Smearing of the Vertex position: x y z t
SIM.vertexSigma = [0.0, 0.0, 0.0, 0.0]


################################################################################
## Action holding sensitive detector actions
##   The default tracker and calorimeter actions can be set with
## 
##   >>> SIM = DD4hepSimulation()
##   >>> SIM.action.tracker = ('Geant4TrackerWeightedAction', {'HitPositionCombination': 2, 'CollectSingleDeposits': False})
##   >>> SIM.action.calo    = "Geant4CalorimeterAction"
## 
##   for specific subdetectors specific sensitive detectors can be set based on pattern matching
## 
##   >>> SIM = DD4hepSimulation()
##   >>> SIM.action.mapActions['tpc'] = "TPCSDAction"
## 
##   and additional parameters for the sensitive detectors can be set when the map is given a tuple
## 
##   >>> SIM = DD4hepSimulation()
##   >>> SIM.action.mapActions['ecal'] =( "CaloPreShowerSDAction", {"FirstLayerNumber": 1} )
## 
##    
################################################################################

##  set the default calorimeter action 
SIM.action.calo = "Geant4ScintillatorCalorimeterAction"

##  create a map of patterns and actions to be applied to sensitive detectors
##         example: SIM.action.mapActions['tpc'] = "TPCSDAction" 
SIM.action.mapActions = {}

SIM.action.mapActions['tpc'] = "TPCSDAction"

##  set the default tracker action 
SIM.action.tracker = ('Geant4TrackerWeightedAction', {'HitPositionCombination': 2, 'CollectSingleDeposits': False})


################################################################################
## Configuration for the magnetic field (stepper) 
################################################################################
## --- used in v01-19-05 :
SIM.field.delta_chord = 1e-05
SIM.field.delta_intersection = 1e-05
SIM.field.delta_one_step = .5e-03*mm
SIM.field.eps_max = 1e-04
SIM.field.eps_min = 1e-05
SIM.field.equation = "Mag_UsualEqRhs"
SIM.field.largest_step = 10.*m
SIM.field.min_chord_step = 1.e-2*mm
SIM.field.stepper = "HelixSimpleRunge"

## ---  default values in ddsim
##SIM.field.delta_chord = 0.25
##SIM.field.delta_intersection = 0.001
##SIM.field.delta_one_step = 0.01
##SIM.field.eps_max = 0.001
##SIM.field.eps_min = 5e-05
##SIM.field.equation = "Mag_UsualEqRhs"
##SIM.field.largest_step = 10000.0
##SIM.field.min_chord_step = 0.01
##SIM.field.stepper = "G4ClassicalRK4"

################################################################################
## Configuration for sensitive detector filters
## 
##   Set the default filter for tracker or caliromter
##   >>> SIM.filter.tracker = "edep1kev"
##   >>> SIM.filter.calo = ""
## 
##   Assign a filter to a sensitive detector via pattern matching
##   >>> SIM.filter.mapDetFilter['FTD'] = "edep1kev"
## 
##   Or more than one filter:
##   >>> SIM.filter.mapDetFilter['FTD'] = ["edep1kev", "geantino"]
## 
##   Don't use the default filter or anything else:
##   >>> SIM.filter.mapDetFilter['TPC'] = None ## or "" or []
## 
##   Create a custom filter. The dictionary is used to instantiate the filter later on
##   >>> SIM.filter.filters['edep3kev'] = dict(name="EnergyDepositMinimumCut/3keV", parameter={"Cut": 3.0*keV} )
## 
##    
################################################################################

##  default filter for calorimeter sensitive detectors; this is applied if no other filter is used for a calorimeter 
SIM.filter.calo = "edep0"

##  list of filter objects: map between name and parameter dictionary 
SIM.filter.filters = {'edep0': {'parameter': {'Cut': 0.0}, 'name': 'EnergyDepositMinimumCut/Cut0'}, 'geantino': {'parameter': {}, 'name': 'GeantinoRejectFilter/GeantinoRejector'}, 'edep1kev': {'parameter': {'Cut': 0.001}, 'name': 'EnergyDepositMinimumCut'}}

##  a map between patterns and filter objects, using patterns to attach filters to sensitive detector 
SIM.filter.mapDetFilter = {}

SIM.filter.mapDetFilter['TPC'] = None

##  default filter for tracking sensitive detectors; this is applied if no other filter is used for a tracker
SIM.filter.tracker = "edep1kev"


################################################################################
## Configuration for the GuineaPig InputFiles 
################################################################################

## Set the number of pair particles to simulate per event.
##     Only used if inputFile ends with ".pairs"
##     If "-1" all particles will be simulated in a single event
##     
SIM.guineapig.particlesPerEvent = "-1"


################################################################################
## Configuration for the DDG4 ParticleGun 
################################################################################

##  direction of the particle gun, 3 vector 
SIM.gun.direction = (0, 0, 1)

## choose the distribution of the random direction for theta
## 
##     Options for random distributions:
## 
##     'uniform' is the default distribution, flat in theta
##     'cos(theta)' is flat in cos(theta)
##     'eta', or 'pseudorapidity' is flat in pseudorapity
##     'ffbar' is distributed according to 1+cos^2(theta)
## 
##     Setting a distribution will set isotrop = True
##     
SIM.gun.distribution = None
SIM.gun.energy = 10000.0

##  isotropic distribution for the particle gun
## 
##     use the options phiMin, phiMax, thetaMin, and thetaMax to limit the range of randomly distributed directions
##     if one of these options is not None the random distribution will be set to True and cannot be turned off!
##     
SIM.gun.isotrop = False
SIM.gun.multiplicity = 1
SIM.gun.particle = "mu-"
SIM.gun.phiMax = None

## Minimal azimuthal angle for random distribution
SIM.gun.phiMin = None

##  position of the particle gun, 3 vector 
SIM.gun.position = (0.0, 0.0, 0.0)
SIM.gun.thetaMax = None
SIM.gun.thetaMin = None


################################################################################
## Configuration for the output levels of DDG4 components 
################################################################################

## Output level for input sources
SIM.output.inputStage = 3

## Output level for Geant4 kernel
SIM.output.kernel = 3

## Output level for ParticleHandler
SIM.output.part = 3

## Output level for Random Number Generator setup
SIM.output.random = 6


################################################################################
## Configuration for the Particle Handler/ MCTruth treatment 
################################################################################

## Enable lots of printout on simulated hits and MC-truth information
SIM.part.enableDetailedHitsAndParticleInfo = False

##  Keep all created particles 
SIM.part.keepAllParticles = False

## Minimal distance between particle vertex and endpoint of parent after
##     which the vertexIsNotEndpointOfParent flag is set
##     
SIM.part.minDistToParentVertex = 2.2e-14

## MinimalKineticEnergy to store particles created in the tracking region
SIM.part.minimalKineticEnergy = 1*MeV

##  Printout at End of Tracking 
SIM.part.printEndTracking = False

##  Printout at Start of Tracking 
SIM.part.printStartTracking = False

## List of processes to save, on command line give as whitespace separated string in quotation marks
SIM.part.saveProcesses = ['Decay']


################################################################################
## Configuration for the PhysicsList 
################################################################################
# this needs to be set to False if any standard physics list is used:
SIM.physics.decays = False
SIM.physics.list = "QGSP_BERT" # "FTFP_BERT"

##  location of particle.tbl file containing extra particles and their lifetime information
##     
SIM.physics.pdgfile = os.path.join( os.environ.get("DD4HEP"),  "DDG4/examples/particle.tbl")

##  The global geant4 rangecut for secondary production
## 
##     Default is 0.7 mm as is the case in geant4 10
## 
##     To disable this plugin and be absolutely sure to use the Geant4 default range cut use "None"
## 
##     Set printlevel to DEBUG to see a printout of all range cuts,
##     but this only works if range cut is not "None"
##     
SIM.physics.rangecut =  0.1*mm


################################################################################
## Properties for the random number generator 
################################################################################

## If True, calculate random seed for each event based on eventID and runID
## allows reproducibility even when SkippingEvents
SIM.random.enableEventSeed = False
SIM.random.file = None
SIM.random.luxury = 1
SIM.random.replace_gRandom = True
SIM.random.seed = None
SIM.random.type = None

Information that differs per run

run_df.transpose()[run_df.nunique().values != 1].transpose()
CommandLine DateUTC User WorkingDirectory _argv inputFiles outputFile random.seed steeringFile
0 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:41:59.159231 UTC ilcusr000 /var/lib/condor/execute/dir_25888/DIRAC_JCtiDd... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/var/lib/condor/execute/dir_25888/DIRAC_JCti... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 157824 /var/lib/condor/execute/dir_25888/DIRAC_JCtiDd...
1 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:43:00.420341 UTC ilc032 /pool/condor/dir_87689/DIRAC__LfCKVpilot/60176368 ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/pool/condor/dir_87689/DIRAC__LfCKVpilot/601... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578221 /pool/condor/dir_87689/DIRAC__LfCKVpilot/60176...
2 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:43:57.852468 UTC ilc033 /home/pool/condor/dir_10221/DIRAC_LMS78spilot/... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/home/pool/condor/dir_10221/DIRAC_LMS78spilo... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578229 /home/pool/condor/dir_10221/DIRAC_LMS78spilot/...
3 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:39:57.254905 UTC ilc045 /var/lib/condor/execute/dir_29408/DIRAC_VQPEfu... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/var/lib/condor/execute/dir_29408/DIRAC_VQPE... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 157829 /var/lib/condor/execute/dir_29408/DIRAC_VQPEfu...
4 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:42:11.717918 UTC ilc006 /scratch/condor_pool/condor/dir_3789/rJ0NDmECq... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/scratch/condor_pool/condor/dir_3789/rJ0NDmE... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578216 /scratch/condor_pool/condor/dir_3789/rJ0NDmECq...
... ... ... ... ... ... ... ... ... ...
67 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:42:46.151946 UTC ilc032 /pool/condor/dir_265147/DIRAC_bBbVv1pilot/6017... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/pool/condor/dir_265147/DIRAC_bBbVv1pilot/60... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578278 /pool/condor/dir_265147/DIRAC_bBbVv1pilot/6017...
68 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:42:28.066133 UTC ilc006 /scratch/condor_pool/condor/dir_53522/iZVKDmNC... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/scratch/condor_pool/condor/dir_53522/iZVKDm... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578231 /scratch/condor_pool/condor/dir_53522/iZVKDmNC...
69 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:43:05.691181 UTC ilc006 /scratch/condor_pool/condor/dir_181389/flnNDmY... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/scratch/condor_pool/condor/dir_181389/flnND... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578272 /scratch/condor_pool/condor/dir_181389/flnNDmY...
70 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:42:19.101171 UTC ilc032 /pool/condor/dir_12979/DIRAC_PM8x8fpilot/60176411 ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/pool/condor/dir_12979/DIRAC_PM8x8fpilot/601... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578264 /pool/condor/dir_12979/DIRAC_PM8x8fpilot/60176...
71 /cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v02... 2022-01-26 05:41:23.246551 UTC ilc041 /var/lib/condor/execute/dir_7589/DIRAC_zscCfsp... ['/cvmfs/ilc.desy.de/sw/x86_64_gcc82_centos7/v... ['/var/lib/condor/execute/dir_7589/DIRAC_zscCf... sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_ee... 1578255 /var/lib/condor/execute/dir_7589/DIRAC_zscCfsp...

72 rows × 9 columns

Example: All output file names

for v in run_df.transpose()[run_df.nunique().values != 1].transpose().outputFile.values:
    print(v)
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_001.d_sim_00015782_4.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_002.d_sim_00015782_21.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_003.d_sim_00015782_29.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_004.d_sim_00015782_9.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_005.d_sim_00015782_16.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_006.d_sim_00015782_1.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_007.d_sim_00015782_13.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_008.d_sim_00015782_3.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_009.d_sim_00015782_14.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_010.d_sim_00015782_6.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_011.d_sim_00015782_19.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_012.d_sim_00015782_20.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_013.d_sim_00015782_12.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_014.d_sim_00015782_15.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_015.d_sim_00015782_26.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_016.d_sim_00015782_25.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_017.d_sim_00015782_17.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_018.d_sim_00015782_28.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_019.d_sim_00015782_5.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_020.d_sim_00015782_11.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_021.d_sim_00015782_22.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_022.d_sim_00015782_8.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_023.d_sim_00015782_10.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_024.d_sim_00015782_24.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_025.d_sim_00015782_2.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_026.d_sim_00015782_69.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_027.d_sim_00015782_18.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_028.d_sim_00015782_7.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_029.d_sim_00015782_27.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_030.d_sim_00015782_67.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_031.d_sim_00015782_23.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_032.d_sim_00015782_40.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_033.d_sim_00015782_68.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_034.d_sim_00015782_45.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_035.d_sim_00015782_38.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_036.d_sim_00015782_58.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_037.d_sim_00015782_46.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_038.d_sim_00015782_47.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_039.d_sim_00015782_33.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_040.d_sim_00015782_44.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_041.d_sim_00015782_87.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_042.d_sim_00015782_54.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_043.d_sim_00015782_65.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_044.d_sim_00015782_66.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_045.d_sim_00015782_42.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_046.d_sim_00015782_76.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_047.d_sim_00015782_49.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_048.d_sim_00015782_71.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_049.d_sim_00015782_83.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_050.d_sim_00015782_81.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_051.d_sim_00015782_43.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_052.d_sim_00015782_75.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_053.d_sim_00015782_37.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_054.d_sim_00015782_36.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_055.d_sim_00015782_88.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_056.d_sim_00015782_53.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_057.d_sim_00015782_77.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_058.d_sim_00015782_59.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_059.d_sim_00015782_91.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_060.d_sim_00015782_30.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_061.d_sim_00015782_89.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_062.d_sim_00015782_35.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_063.d_sim_00015782_41.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_064.d_sim_00015782_80.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_065.d_sim_00015782_86.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_066.d_sim_00015782_39.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_067.d_sim_00015782_79.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_068.d_sim_00015782_78.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_069.d_sim_00015782_31.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_070.d_sim_00015782_72.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_071.d_sim_00015782_64.slcio
sv02-02.mILD_l5_v02.E250-SetA.I500002.P2f_z_eehiq.eL.pR.n000_072.d_sim_00015782_55.slcio

Event header

To have the code run fast, only a small max_events is used here. The values in the event header are still identical when looking at the whole file.

print(aj.event_header.nunique())
aj.event_header
RUN         1
DETECTOR    1
dtype: int64
RUN DETECTOR
EVENT
1 500002 ILD_l5_v02
2 500002 ILD_l5_v02
3 500002 ILD_l5_v02

Event information

anajob_events_path = data_folder + "/anajob_events.csv"
try:
    ev_df = pd.read_csv(anajob_events_path, index_col=0)
except FileNotFoundError:
    ev_df = Anajob(raw_txt, max_events=-1).df
    try:
        ev_df.to_csv(anajob_events_path)
    except BaseException:
        print(
            f"WARNING: The dataframe could not be saved to {anajob_events_path}",
            file=sys.stderr,
        )
mean_n = ev_df.groupby("COLLECTION NAME")["NUMBER OF ELEMENTS"].mean()
_df = pd.DataFrame(np.array([mean_n.index.values, mean_n.values]).T.reshape(-1, 2))
_df.columns = ["COLLECTION NAME", "mean NUMBER OF ELEMENTS"]
HTML(_df.to_html(index=False))
COLLECTION NAME mean NUMBER OF ELEMENTS
BCalClusters 1.115112
BCalRecoParticle 1.115112
BuildUpVertex 0.008796
BuildUpVertex_RP 0.008796
BuildUpVertex_V0 0.003356
BuildUpVertex_V0_RP 0.003356
ClusterMCTruthLink 53.162593
DistilledPFOs 15.783866
GammaGammaCandidateEtaPrimes 0.02912
GammaGammaCandidateEtas 0.187222
GammaGammaCandidatePi0s 0.464792
GammaGammaParticles 0.386991
KinkRecoParticles 1.028953
KinkVertices 1.028953
MCParticlesSkimmed 144.973519
MCTruthClusterLink 53.162593
MCTruthMarlinTrkTracksLink 10.1775
MCTruthRecoLink 56.931435
MarlinTrkTracks 9.63463
MarlinTrkTracksKaon 9.63463
MarlinTrkTracksMCTruthLink 10.1775
MarlinTrkTracksProton 9.63463
PandoraClusters 13.326227
PandoraPFOs 16.170856
PrimaryVertex 1.0
PrimaryVertex_RP 1.0
ProngRecoParticles 1.019178
ProngVertices 1.019178
RecoMCTruthLink 56.931435
SplitRecoParticles 1.016854
SplitVertices 1.016854
V0RecoParticles 1.064981
V0Vertices 1.064981
ev_df
COLLECTION NAME COLLECTION TYPE NUMBER OF ELEMENTS EVENT
0 BuildUpVertex Vertex 0 1
1 BuildUpVertex_RP ReconstructedParticle 0 1
2 BuildUpVertex_V0 Vertex 0 1
3 BuildUpVertex_V0_RP ReconstructedParticle 0 1
4 ClusterMCTruthLink LCRelation 13 1
... ... ... ... ...
18 PandoraClusters Cluster 17 43200
19 PandoraPFOs ReconstructedParticle 25 43200
20 PrimaryVertex Vertex 1 43200
21 PrimaryVertex_RP ReconstructedParticle 1 43200
22 RecoMCTruthLink LCRelation 53 43200

1020784 rows × 4 columns

def plot_entries_per_collection(df):
    per_collection = df.groupby("COLLECTION NAME")["NUMBER OF ELEMENTS"]
    bins1 = np.arange(-0.5, 51, 1)
    bins2 = np.arange(-0.5, 16)
    bins3 = np.arange(-0.5, per_collection.max().max() + 0.5, 20)
    fig, ax = plt.subplots(figsize=(12, 9))
    axins1 = ax.inset_axes([0.30, 0.60, 0.69, 0.39], transform=ax.transAxes)
    axins2 = ax.inset_axes([0.30, 0.17, 0.69, 0.39], transform=ax.transAxes)
    kw = dict(histtype="step", linewidth=2, density=True)
    for i, collection in enumerate(
        per_collection.sum().sort_values(ascending=False).index
    ):
        x = df[df["COLLECTION NAME"] == collection]["NUMBER OF ELEMENTS"]
        kw["color"] = list(mcolors.TABLEAU_COLORS)[i % 10]
        kw["linestyle"] = ["-", ":", "--", "-."][i // 10]
        x.hist(bins=bins1, ax=ax, label=f"{collection} ({x.mean():.1f})", **kw)
        if all(x < max(bins2)):
            x.hist(bins=bins2, ax=axins1, cumulative=-1, **kw)
        x.hist(bins=bins3, ax=axins2, **kw)
    axins1.patch.set_alpha(0.8)
    axins2.patch.set_alpha(0.8)
    ax.set_ylabel("pdf")
    axins1.set_ylabel("cdf")
    axins2.set_ylabel("pdf")
    axins2.set_yscale("log")
    ax.legend(title="COLLECTION NAME", bbox_to_anchor=(1.0, 1.0))
    fig.tight_layout()
    return fig


fig = plot_entries_per_collection(ev_df)
../_images/anajob_20_0.png