LDMX Software
EventFile.cxx
1#include <regex.h>
2
3#include <ctime>
4
5#include "TTreeReader.h"
6
7// LDMX
8#include "Framework/Event.h"
9#include "Framework/EventFile.h"
10#include "Framework/Exception/Exception.h"
11#include "Framework/RunHeader.h"
12
13namespace framework {
14
16 const std::string &filename, EventFile *parent,
17 bool is_output_file, bool is_single_output,
18 bool is_loopable)
19 : file_name_(filename),
20 is_output_file_(is_output_file),
21 is_single_output_(is_single_output),
22 is_loopable_(is_loopable),
23 parent_(parent) {
24 if (is_output_file_) {
25 // we are writting out so open the file and make sure it is writable
26 file_ = new TFile(file_name_.c_str(), "RECREATE");
27 if (!file_->IsOpen() or !file_->IsWritable()) {
28 EXCEPTION_RAISE("FileError",
29 "Output file '" + file_name_ + "' is not writable.");
30 }
31
32 // set compression settings
33 // Check out the TFile constructor for explanation of how this integer is
34 // built Short Reference: setting = 100*algorithem + level algorithm = 0
35 // ==> use global default
36 file_->SetCompressionSettings(params.get<int>("compression_setting", 9));
37
38 if (parent_) {
39 // output file when there are input files
40 // might be drop/keep rules, so we should have these rules to make sure
41 // it works
42
43 // turn everything on
44 // hypothetically could turn everything off? Doesn't work for some
45 // reason?
46 pre_clone_rules_.emplace_back("*", true);
47
48 // except EventHeader (copies over to output)
49 pre_clone_rules_.emplace_back("EventHeader*", true);
50
51 // reactivate all branches so default behavior is drop
52 reactivate_rules_.push_back("*");
53 }
54 } else {
55 // open file with only reading enabled
56 file_ = new TFile(file_name_.c_str());
57 // double check that file is open
58 if (!file_->IsOpen()) {
59 EXCEPTION_RAISE("FileError", "Input file '" + file_name_ +
60 "' is not readable or does not exist.");
61 }
62
63 bool skip_corrupted = params.get<bool>("skip_corrupted_input_files", false);
64
65 // make sure file is not a zombie file
66 // (i.e. process ended without closing or the file was corrupted some other
67 // way)
68 if (file_->IsZombie()) {
69 if (not skip_corrupted) {
70 EXCEPTION_RAISE("FileError", "Input file '" + file_name_ +
71 "' is corrupted. Framework will not "
72 "attempt to recover this file.");
73 }
74 return;
75 }
76
77 // Get the tree name from the configuration
78 auto tree_name{params.get<std::string>("tree_name")};
79 tree_ = static_cast<TTree *>(file_->Get(tree_name.c_str()));
80 if (!tree_) {
81 if (not skip_corrupted) {
82 EXCEPTION_RAISE("FileError", "File '" + file_name_ +
83 "' does not have a TTree named '" +
84 tree_name + "' in it.");
85 }
86 return;
87 }
88 entries_ = tree_->GetEntriesFast();
89 }
90
92}
93
95 const std::string &filename, bool is_loopable)
96 : EventFile(params, filename, nullptr, false, false, is_loopable) {}
97
99 const std::string &filename)
100 : EventFile(params, filename, nullptr, false, false, false) {}
101
103 const std::string &filename, EventFile *parent,
104 bool is_single_output)
105 : EventFile(params, filename, parent, true, is_single_output, false) {}
106
108 // Before an output file, the Event tree needs to be written.
109 if (is_output_file_) {
110 // make sure we are in output file before writing
111 file_->cd();
112 tree_->Write();
113 }
114
115 // Close the file
116 file_->Close();
117}
118
120 if (is_output_file_) return file_->IsZombie();
121 return (!tree_ or file_->IsZombie() or file_->GetNkeys() == 0);
122}
123
124void EventFile::addDrop(const std::string &rule) {
125 int offset;
126 bool is_keep = false, is_drop = false, is_ignore = false;
127 // keywords must appear at the start of the rule string
128 if (rule.find("keep") == 0) {
129 offset = 4;
130 is_keep = true;
131 } else if (rule.find("drop") == 0) {
132 offset = 4;
133 is_drop = true;
134 } else if (rule.find("ignore") == 0) {
135 offset = 6;
136 is_ignore = true;
137 }
138
139 // none of (keep,drop,ignore) was provided => not valid rule
140 if (int(is_keep) + int(is_drop) + int(is_ignore) != 1) return;
141
142 std::string srule = rule.substr(offset);
143 size_t i;
144 for (i = srule.find_first_of(" \t\n\r"); i != std::string::npos;
145 i = srule.find_first_of(" \t\n\r"))
146 srule.erase(i, 1);
147
148 // name of branch is not given
149 if (srule.length() == 0) return;
150
151 // add wild card at end for matching purposes
152 if (srule.back() != '*') srule += ".*"; // add wildcard to back
153
154 // Guard: EventHeader must never be dropped or ignored
155 if (is_drop or is_ignore) {
156 regex_t guard_reg;
157 if (regcomp(&guard_reg, srule.c_str(),
158 REG_EXTENDED | REG_ICASE | REG_NOSUB) == 0) {
159 bool matches_event_header =
160 (regexec(&guard_reg, ldmx::EventHeader::BRANCH.c_str(), 0, 0, 0) ==
161 0);
162 regfree(&guard_reg);
163 if (matches_event_header) {
164 EXCEPTION_RAISE("BadRule",
165 "Drop/ignore rule '" + rule +
166 "' would affect EventHeader which is required by "
167 "the framework and cannot be removed.");
168 }
169 }
170 }
171
172 if (is_keep) {
173 // turn both the input and output tree's on
174 // root needs . removed otherwise it gets cranky
175 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
176 pre_clone_rules_.emplace_back(srule, true);
177 // this branch will then be copied over into output tree and be active
178 } else if (is_ignore) {
179 // don't even read it from the input file
180 // pass regex (with dots) to event bus so setInputTree skips these branches
181 event_->addIgnore(srule); // requires event_ to be set
182 // root needs . removed otherwise it gets cranky
183 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
184 // warn if this rule drops all collections
185 if (srule == "*")
186 ldmx_log(fatal) << "Ignore rule '" << rule
187 << "' will hide all input collections from processors.";
188 pre_clone_rules_.emplace_back(srule, false);
189 // these branches won't be copied over into output tree
190 } else if (is_drop) {
191 // drop means allowing it on reading but not writing
192 // pass these regex to event bus so Event::add knows
193 event_->addDrop(srule); // requires event_ to be set
194
195 // root needs . removed otherwise it gets cranky
196 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
197 // warn if this rule drops all collections
198 if (srule == "*")
199 ldmx_log(fatal) << "Drop rule '" << rule
200 << "' will drop all collections from the output file.";
201 pre_clone_rules_.emplace_back(srule, false);
202 // these branches won't be copied over into output tree
203 // reactivate input branch after clone
204 reactivate_rules_.push_back(srule);
205 }
206}
207
208bool EventFile::nextEvent(bool storeCurrentEvent) {
209 if (ientry_ < 0) {
210 // first entry of this file
211 if (parent_) {
212 // we have a parent file
213 if (!parent_->tree_) {
214 // this should _never_ happen
215 EXCEPTION_RAISE("EventFile", "No event tree in the file");
216 }
217 // Only clone parent tree if either
218 // 1) There is no tree setup yet (first input file)
219 // 2) This is not single output (new input file --> new output file)
220 if (!tree_ or !is_single_output_) {
221 // clones parent_->tree_ to our tree_ keeping drop/keep rules in mind
222 // clone tree (only copies over branches that are active on input tree)
223
224 file_->cd(); // go into output file
225
226 for (auto const &rule_pair : pre_clone_rules_)
227 parent_->tree_->SetBranchStatus(rule_pair.first.c_str(),
228 rule_pair.second);
229
230 tree_ = parent_->tree_->CloneTree(0);
231
232 // reactivate any drop branches (drop) on input tree
233 for (auto const &rule : reactivate_rules_)
234 parent_->tree_->SetBranchStatus(rule.c_str(), 1);
235 }
238 } // we have a parent file
239 } else {
240 // later than first entry of file
241 if (is_output_file_) {
243 if (storeCurrentEvent) // we should store before moving on
244 tree_->Fill(); // fill the clones...
245 } // we are an output file
246
247 // the event bus may not be defined
248 // for this file if we are input file and
249 // there is an output file during this run
250 if (event_) {
251 event_->clear();
253 } // event bus defined
254 } // first or not first entry in this file
255
256 if (parent_) {
257 // we have a parent, follow their lead
258 if (!parent_->nextEvent()) {
259 return false;
260 }
262 entries_++;
263 } else if (is_output_file_) {
264 // we don't have a parent and we
265 // are an output file
266 // Just increment the number of entries
267 // and the index_ of the current entry
268 ientry_++;
269 entries_++;
270 } else {
271 // we don't have a parent and
272 // we aren't an output file
273 // try to load another entry from our tree
274 if (ientry_ + 1 >= entries_) {
275 if (is_loopable_) {
276 // reset the event counter: reuse events from start of pileup tree
277 ientry_ = -1;
278 } else
279 return false;
280 }
281 ientry_++;
282 tree_->GetEntry(ientry_);
283 }
284
285 // if we have an event_
286 // make sure it is iterated as well
287 return event_ ? event_->nextEvent() : true;
288}
289
291 event_ = evt;
292 if (is_output_file_) {
293 // we are an output file
294 if (!tree_ && !parent_) {
295 // we don't have a tree and we don't have a parent
296 // ==> *Production Mode* create a new tree
298 ientry_ = 0;
299 entries_ = 0;
300 }
301
302 if (parent_) {
303 // we have a parent file so give
304 // the parent's tree to the event bus
305 // as the input tree
307 }
308
309 // give our tree to the event as the output tree
311 } else {
312 // we are an input file
313 // so give our tree to the event as input tree
315 } // output or input file
316}
317
318int EventFile::skipToEvent(int offset) {
319 // make sure the event number exists
320 ientry_ = offset % entries_ - 1;
321 return ientry_;
322}
323
325 parent_ = parent;
326
327 // we can assume parent_->tree_ is valid
328 // because (for input files) the tree_ is imported
329 // from the file and then checked if its valid in the
330 // EventFile constructor
331
332 // Enter output file
333 file_->cd();
334
335 // need to turn on/off the same branches as in the initial setup...
336 for (auto const &rule_pair : pre_clone_rules_)
337 parent_->tree_->SetBranchStatus(rule_pair.first.c_str(), rule_pair.second);
338
339 // Copy over addresses from the new parent
340 parent_->tree_->CopyAddresses(tree_);
341
342 // and reactivate any dropping rules
343 for (auto const &rule : reactivate_rules_)
344 parent_->tree_->SetBranchStatus(rule.c_str(), 1);
345
346 // Reset the entry index_ with the new parent index_
348
349 // import run headers from new input file
351
352 return;
353}
354
356 if (not is_output_file_) {
357 EXCEPTION_RAISE("MisCall",
358 "Cannot write the run tree on an input event file.");
359 }
360
361 // store the run map into the output tree
362 // Check for the existence of the run tree in the file.
363 // If it already exists, throw an exception.
364 // TODO: Tree name shouldn't be hardcoded. Is this check really necessary?
365 auto run_tree{static_cast<TTree *>(file_->Get("LDMX_Run"))};
366 if (run_tree) {
367 EXCEPTION_RAISE("RunTree",
368 "RunTree 'LDMX_Run' already exists in output file '" +
369 file_name_ + "'.");
370 }
371
382 file_->cd();
383 run_tree = new TTree("LDMX_Run", "LDMX run header");
384
385 // create the branch on this tree
386 ldmx::RunHeader *the_handle = nullptr;
387 run_tree->Branch("RunHeader", "ldmx::RunHeader", &the_handle, 32000, 3);
388
389 // copy over the run headers into the tree
390 for (auto &[num, header_pair] : run_map_) {
391 the_handle = header_pair.second;
392 run_tree->Fill();
393 if (header_pair.first) delete header_pair.second;
394 }
395
396 run_tree->Write();
397}
398
400 int run_number = run_header.getRunNumber();
401
402 if (run_map_.find(run_number) != run_map_.end()) {
403 EXCEPTION_RAISE("RunMap", "Run map already contains a run with number '" +
404 std::to_string(run_number) + "'.");
405 }
406
407 run_map_[run_number] = std::make_pair(false, &run_header);
408
409 return;
410}
411
413 if (run_map_.find(run_number) != run_map_.end()) {
414 return run_map_.at(run_number).second;
415 }
416 return nullptr;
417}
418
420 ldmx::RunHeader *rh{this->getRunHeaderPtr(run_number)};
421 if (rh != nullptr) {
422 return *rh;
423 }
424 EXCEPTION_RAISE("RunHeader", "Unable to find header for run " +
425 std::to_string(run_number));
426}
427
429 // choose which file to import from
430 auto the_import_file{file_}; // if this is an input file
432 the_import_file = parent_->file_; // output file with input parent
433 else if (is_output_file_)
434 return; // output file, no input parent to read from
435
436 if (the_import_file) {
437 // the file exist
438 TTreeReader old_run_tree("LDMX_Run", the_import_file);
439 TTreeReaderValue<ldmx::RunHeader> old_run_header(old_run_tree, "RunHeader");
440 // TODO check that setup went correctly
441 while (old_run_tree.Next()) {
442 auto *old_run_header_ptr = old_run_header.Get();
443 if (old_run_header_ptr != nullptr) {
444 // copy input run tree into run map
445 // We should consider moving to a shared_ptr instead of 'new'
446 run_map_[old_run_header_ptr->getRunNumber()] =
447 std::make_pair(true, new ldmx::RunHeader(*old_run_header_ptr));
448 }
449 }
450 }
451
452 return;
453}
454} // namespace framework
Class implementing an event buffer system for storing event data.
This class manages all ROOT file input/output operations.
Definition EventFile.h:27
void updateParent(EventFile *parent)
Change pointer to different parent file.
TFile * file_
The backing TFile for this EventFile.
Definition EventFile.h:300
void addDrop(const std::string &rule)
Add a rule for dropping collections from the output.
void setupEvent(Event *evt)
Set an Event object containing the event data to work with this file.
std::map< int, std::pair< bool, ldmx::RunHeader * > > run_map_
Map of run numbers to RunHeader objects.
Definition EventFile.h:338
Long64_t entries_
The number of entries in the tree.
Definition EventFile.h:282
void writeRunTree()
Write the map of run headers to the file as a TTree of RunHeader.
bool nextEvent(bool storeCurrentEvent=true)
Prepare the next event.
ldmx::RunHeader * getRunHeaderPtr(int runNumber)
Update the RunHeader for a given run, if it exists in the input file.
Long64_t ientry_
The current entry in the tree.
Definition EventFile.h:285
std::vector< std::pair< std::string, bool > > pre_clone_rules_
Pre-clone rules.
Definition EventFile.h:317
void writeRunHeader(ldmx::RunHeader &runHeader)
Write the run header into the run map.
bool is_loopable_
True if this is an input file with pileup overlay events *‍/.
Definition EventFile.h:297
~EventFile()
Destructor.
void importRunHeaders()
Fill the internal map of run numbers to RunHeader objects from the input file.
int skipToEvent(int offset)
Skip events using an offset.
std::string file_name_
The file name.
Definition EventFile.h:288
ldmx::RunHeader & getRunHeader(int runNumber)
Get the RunHeader for a given run, if it exists in the input file.
bool is_single_output_
True if there is only one output file.
Definition EventFile.h:294
EventFile(const framework::config::Parameters &params, const std::string &filename, EventFile *parent, bool isOutputFile, bool isSingleOutput, bool isLoopable)
Constructor to make a general file.
Definition EventFile.cxx:15
EventFile * parent_
A parent file containing event data.
Definition EventFile.h:306
std::vector< std::string > reactivate_rules_
Vector of drop rules that have been parsed and need to be used to reactivate these branches on the in...
Definition EventFile.h:326
bool is_output_file_
True if file is an output file being written to disk.
Definition EventFile.h:291
TTree * tree_
The tree with event data.
Definition EventFile.h:303
Event * event_
The object containing the actual event data (trees and branches).
Definition EventFile.h:309
bool isCorrupted() const
Check if the file we have is corrupted.
Implements an event buffer system for storing event data.
Definition Event.h:42
void addIgnore(const std::string &exp)
Add an ignore rule to the list of regex expressions to ignore on input.
Definition Event.cxx:34
void clear()
Clear this object's data (including passengers).
Definition Event.cxx:179
TTree * createTree()
Create the output data tree.
Definition Event.cxx:115
void setOutputTree(TTree *tree)
Set the output data tree.
Definition Event.cxx:121
void beforeFill()
Action to be executed before the tree is filled.
Definition Event.cxx:170
void onEndOfEvent()
Perform end of event action (doesn't do anything right now).
Definition Event.cxx:184
bool nextEvent()
Go to the next event by retrieving the event header.
Definition Event.cxx:165
void setInputTree(TTree *tree)
Set the input data tree.
Definition Event.cxx:123
void addDrop(const std::string &exp)
Add a drop rule to the list of regex expressions to drop.
Definition Event.cxx:24
Class encapsulating parameters for configuring a processor.
Definition Parameters.h:29
const T & get(const std::string &name) const
Retrieve the parameter of the given name.
Definition Parameters.h:78
static const std::string BRANCH
Name of EventHeader branch.
Definition EventHeader.h:49
Run-specific configuration and data stored in its own output TTree alongside the event TTree in the o...
Definition RunHeader.h:57
int getRunNumber() const
Definition RunHeader.h:77
All classes in the ldmx-sw project use this namespace.