LDMX Software
EventFile.cxx
1#include <regex.h>
2
3#include <ctime>
4
5#include "TTreeReader.h"
6
7// LDMX
8#include "Framework/Event.h"
9#include "Framework/EventFile.h"
10#include "Framework/Exception/Exception.h"
11#include "Framework/RunHeader.h"
12
13namespace framework {
14
16 const std::string& filename, EventFile* parent,
17 bool is_output_file, bool is_single_output,
18 bool is_loopable)
19 : file_name_(filename),
20 is_output_file_(is_output_file),
21 is_single_output_(is_single_output),
22 is_loopable_(is_loopable),
23 parent_(parent) {
24 if (is_output_file_) {
25 // we are writting out so open the file and make sure it is writable
26 file_ = new TFile(file_name_.c_str(), "RECREATE");
27 if (!file_->IsOpen() or !file_->IsWritable()) {
28 EXCEPTION_RAISE("FileError",
29 "Output file '" + file_name_ + "' is not writable.");
30 }
31
32 // set compression settings
33 // Check out the TFile constructor for explanation of how this integer is
34 // built Short Reference: setting = 100*algorithem + level algorithm = 0
35 // ==> use global default
36 file_->SetCompressionSettings(params.get<int>("compression_setting", 9));
37
38 if (parent_) {
39 // output file when there are input files
40 // might be drop/keep rules, so we should have these rules to make sure
41 // it works
42
43 // turn everything on
44 // hypothetically could turn everything off? Doesn't work for some
45 // reason?
46 pre_clone_rules_.emplace_back("*", true);
47
48 // except EventHeader (copies over to output)
49 pre_clone_rules_.emplace_back("EventHeader*", true);
50
51 // reactivate all branches so default behavior is drop
52 reactivate_rules_.push_back("*");
53 }
54 } else {
55 // open file with only reading enabled
56 file_ = new TFile(file_name_.c_str());
57 // double check that file is open
58 if (!file_->IsOpen()) {
59 EXCEPTION_RAISE("FileError", "Input file '" + file_name_ +
60 "' is not readable or does not exist.");
61 }
62
63 bool skip_corrupted = params.get<bool>("skip_corrupted_input_files", false);
64
65 // make sure file is not a zombie file
66 // (i.e. process ended without closing or the file was corrupted some other
67 // way)
68 if (file_->IsZombie()) {
69 if (not skip_corrupted) {
70 EXCEPTION_RAISE("FileError", "Input file '" + file_name_ +
71 "' is corrupted. Framework will not "
72 "attempt to recover this file.");
73 }
74 return;
75 }
76
77 // Get the tree name from the configuration
78 auto tree_name{params.get<std::string>("tree_name")};
79 tree_ = static_cast<TTree*>(file_->Get(tree_name.c_str()));
80 if (!tree_) {
81 if (not skip_corrupted) {
82 EXCEPTION_RAISE("FileError", "File '" + file_name_ +
83 "' does not have a TTree named '" +
84 tree_name + "' in it.");
85 }
86 return;
87 }
88 entries_ = tree_->GetEntriesFast();
89 }
90
92}
93
95 const std::string& filename, bool is_loopable)
96 : EventFile(params, filename, nullptr, false, false, is_loopable) {}
97
99 const std::string& filename)
100 : EventFile(params, filename, nullptr, false, false, false) {}
101
103 const std::string& filename, EventFile* parent,
104 bool is_single_output)
105 : EventFile(params, filename, parent, true, is_single_output, false) {}
106
108 // Before an output file, the Event tree needs to be written.
109 if (tree_ && is_output_file_) {
110 // make sure we are in output file before writing
111 file_->cd();
112 tree_->Write();
113 file_->Close();
114 }
115}
116
118 if (is_output_file_) return file_->IsZombie();
119 return (!tree_ or file_->IsZombie() or file_->GetNkeys() == 0);
120}
121
122void EventFile::addDrop(const std::string& rule) {
123 int offset;
124 bool is_keep = false, is_drop = false, is_ignore = false;
125 // keywords must appear at the start of the rule string
126 if (rule.find("keep") == 0) {
127 offset = 4;
128 is_keep = true;
129 } else if (rule.find("drop") == 0) {
130 offset = 4;
131 is_drop = true;
132 } else if (rule.find("ignore") == 0) {
133 offset = 6;
134 is_ignore = true;
135 }
136
137 // none of (keep,drop,ignore) was provided => not valid rule
138 if (int(is_keep) + int(is_drop) + int(is_ignore) != 1) return;
139
140 std::string srule = rule.substr(offset);
141 size_t i;
142 for (i = srule.find_first_of(" \t\n\r"); i != std::string::npos;
143 i = srule.find_first_of(" \t\n\r"))
144 srule.erase(i, 1);
145
146 // name of branch is not given
147 if (srule.length() == 0) return;
148
149 // add wild card at end for matching purposes
150 if (srule.back() != '*') srule += ".*"; // add wildcard to back
151
152 // Guard: EventHeader must never be dropped or ignored
153 if (is_drop or is_ignore) {
154 regex_t guard_reg;
155 if (regcomp(&guard_reg, srule.c_str(),
156 REG_EXTENDED | REG_ICASE | REG_NOSUB) == 0) {
157 bool matches_event_header =
158 (regexec(&guard_reg, ldmx::EventHeader::BRANCH.c_str(), 0, 0, 0) ==
159 0);
160 regfree(&guard_reg);
161 if (matches_event_header) {
162 EXCEPTION_RAISE("BadRule",
163 "Drop/ignore rule '" + rule +
164 "' would affect EventHeader which is required by "
165 "the framework and cannot be removed.");
166 }
167 }
168 }
169
170 if (is_keep) {
171 // turn both the input and output tree's on
172 // root needs . removed otherwise it gets cranky
173 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
174 pre_clone_rules_.emplace_back(srule, true);
175 // this branch will then be copied over into output tree and be active
176 } else if (is_ignore) {
177 // don't even read it from the input file
178 // pass regex (with dots) to event bus so setInputTree skips these branches
179 event_->addIgnore(srule); // requires event_ to be set
180 // root needs . removed otherwise it gets cranky
181 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
182 // warn if this rule drops all collections
183 if (srule == "*")
184 ldmx_log(fatal) << "Ignore rule '" << rule
185 << "' will hide all input collections from processors.";
186 pre_clone_rules_.emplace_back(srule, false);
187 // these branches won't be copied over into output tree
188 } else if (is_drop) {
189 // drop means allowing it on reading but not writing
190 // pass these regex to event bus so Event::add knows
191 event_->addDrop(srule); // requires event_ to be set
192
193 // root needs . removed otherwise it gets cranky
194 srule.erase(std::remove(srule.begin(), srule.end(), '.'), srule.end());
195 // warn if this rule drops all collections
196 if (srule == "*")
197 ldmx_log(fatal) << "Drop rule '" << rule
198 << "' will drop all collections from the output file.";
199 pre_clone_rules_.emplace_back(srule, false);
200 // these branches won't be copied over into output tree
201 // reactivate input branch after clone
202 reactivate_rules_.push_back(srule);
203 }
204}
205
206bool EventFile::nextEvent(bool storeCurrentEvent) {
207 if (ientry_ < 0) {
208 // first entry of this file
209 if (parent_) {
210 // we have a parent file
211 if (!parent_->tree_) {
212 // this should _never_ happen
213 EXCEPTION_RAISE("EventFile", "No event tree in the file");
214 }
215 // Only clone parent tree if either
216 // 1) There is no tree setup yet (first input file)
217 // 2) This is not single output (new input file --> new output file)
218 if (!tree_ or !is_single_output_) {
219 // clones parent_->tree_ to our tree_ keeping drop/keep rules in mind
220 // clone tree (only copies over branches that are active on input tree)
221
222 file_->cd(); // go into output file
223
224 for (auto const& rule_pair : pre_clone_rules_)
225 parent_->tree_->SetBranchStatus(rule_pair.first.c_str(),
226 rule_pair.second);
227
228 tree_ = parent_->tree_->CloneTree(0);
229
230 // reactivate any drop branches (drop) on input tree
231 for (auto const& rule : reactivate_rules_)
232 parent_->tree_->SetBranchStatus(rule.c_str(), 1);
233 }
236 } // we have a parent file
237 } else {
238 // later than first entry of file
239 if (is_output_file_) {
241 if (storeCurrentEvent) // we should store before moving on
242 tree_->Fill(); // fill the clones...
243 } // we are an output file
244
245 // the event bus may not be defined
246 // for this file if we are input file and
247 // there is an output file during this run
248 if (event_) {
249 event_->clear();
251 } // event bus defined
252 } // first or not first entry in this file
253
254 if (parent_) {
255 // we have a parent, follow their lead
256 if (!parent_->nextEvent()) {
257 return false;
258 }
260 entries_++;
261 } else if (is_output_file_) {
262 // we don't have a parent and we
263 // are an output file
264 // Just increment the number of entries
265 // and the index_ of the current entry
266 ientry_++;
267 entries_++;
268 } else {
269 // we don't have a parent and
270 // we aren't an output file
271 // try to load another entry from our tree
272 if (ientry_ + 1 >= entries_) {
273 if (is_loopable_) {
274 // reset the event counter: reuse events from start of pileup tree
275 ientry_ = -1;
276 } else
277 return false;
278 }
279 ientry_++;
280 tree_->GetEntry(ientry_);
281 }
282
283 // if we have an event_
284 // make sure it is iterated as well
285 return event_ ? event_->nextEvent() : true;
286}
287
289 event_ = evt;
290 if (is_output_file_) {
291 // we are an output file
292 if (!tree_ && !parent_) {
293 // we don't have a tree and we don't have a parent
294 // ==> *Production Mode* create a new tree
296 ientry_ = 0;
297 entries_ = 0;
298 }
299
300 if (parent_) {
301 // we have a parent file so give
302 // the parent's tree to the event bus
303 // as the input tree
305 }
306
307 // give our tree to the event as the output tree
309 } else {
310 // we are an input file
311 // so give our tree to the event as input tree
313 } // output or input file
314}
315
316int EventFile::skipToEvent(int offset) {
317 // make sure the event number exists
318 ientry_ = offset % entries_ - 1;
319 return ientry_;
320}
321
323 parent_ = parent;
324
325 // we can assume parent_->tree_ is valid
326 // because (for input files) the tree_ is imported
327 // from the file and then checked if its valid in the
328 // EventFile constructor
329
330 // Enter output file
331 file_->cd();
332
333 // need to turn on/off the same branches as in the initial setup...
334 for (auto const& rule_pair : pre_clone_rules_)
335 parent_->tree_->SetBranchStatus(rule_pair.first.c_str(), rule_pair.second);
336
337 // Copy over addresses from the new parent
338 parent_->tree_->CopyAddresses(tree_);
339
340 // and reactivate any dropping rules
341 for (auto const& rule : reactivate_rules_)
342 parent_->tree_->SetBranchStatus(rule.c_str(), 1);
343
344 // Reset the entry index_ with the new parent index_
346
347 // import run headers from new input file
349
350 return;
351}
352
354 if (not is_output_file_) {
355 EXCEPTION_RAISE("MisCall",
356 "Cannot write the run tree on an input event file.");
357 }
358
359 // store the run map into the output tree
360 // Check for the existence of the run tree in the file.
361 // If it already exists, throw an exception.
362 // TODO: Tree name shouldn't be hardcoded. Is this check really necessary?
363 auto run_tree{static_cast<TTree*>(file_->Get("LDMX_Run"))};
364 if (run_tree) {
365 EXCEPTION_RAISE("RunTree",
366 "RunTree 'LDMX_Run' already exists in output file '" +
367 file_name_ + "'.");
368 }
369
380 file_->cd();
381 run_tree = new TTree("LDMX_Run", "LDMX run header");
382
383 // create the branch on this tree
384 ldmx::RunHeader* the_handle = nullptr;
385 run_tree->Branch("RunHeader", "ldmx::RunHeader", &the_handle, 32000, 3);
386
387 // copy over the run headers into the tree
388 for (auto& [num, header_pair] : run_map_) {
389 the_handle = header_pair.second;
390 run_tree->Fill();
391 if (header_pair.first) delete header_pair.second;
392 }
393
394 run_tree->Write();
395}
396
398 int run_number = run_header.getRunNumber();
399
400 if (run_map_.find(run_number) != run_map_.end()) {
401 EXCEPTION_RAISE("RunMap", "Run map already contains a run with number '" +
402 std::to_string(run_number) + "'.");
403 }
404
405 run_map_[run_number] = std::make_pair(false, &run_header);
406
407 return;
408}
409
411 if (run_map_.find(run_number) != run_map_.end()) {
412 return run_map_.at(run_number).second;
413 }
414 return nullptr;
415}
416
418 ldmx::RunHeader* rh{this->getRunHeaderPtr(run_number)};
419 if (rh != nullptr) {
420 return *rh;
421 }
422 EXCEPTION_RAISE("RunHeader", "Unable to find header for run " +
423 std::to_string(run_number));
424}
425
427 // choose which file to import from
428 auto the_import_file{file_}; // if this is an input file
430 the_import_file = parent_->file_; // output file with input parent
431 else if (is_output_file_)
432 return; // output file, no input parent to read from
433
434 if (the_import_file) {
435 // the file exist
436 TTreeReader old_run_tree("LDMX_Run", the_import_file);
437 TTreeReaderValue<ldmx::RunHeader> old_run_header(old_run_tree, "RunHeader");
438 // TODO check that setup went correctly
439 while (old_run_tree.Next()) {
440 auto* old_run_header_ptr = old_run_header.Get();
441 if (old_run_header_ptr != nullptr) {
442 // copy input run tree into run map
443 // We should consider moving to a shared_ptr instead of 'new'
444 run_map_[old_run_header_ptr->getRunNumber()] =
445 std::make_pair(true, new ldmx::RunHeader(*old_run_header_ptr));
446 }
447 }
448 }
449
450 return;
451}
452} // namespace framework
Class implementing an event buffer system for storing event data.
This class manages all ROOT file input/output operations.
Definition EventFile.h:27
void updateParent(EventFile *parent)
Change pointer to different parent file.
TFile * file_
The backing TFile for this EventFile.
Definition EventFile.h:300
void addDrop(const std::string &rule)
Add a rule for dropping collections from the output.
void setupEvent(Event *evt)
Set an Event object containing the event data to work with this file.
Long64_t entries_
The number of entries in the tree.
Definition EventFile.h:282
void writeRunTree()
Write the map of run headers to the file as a TTree of RunHeader.
bool nextEvent(bool storeCurrentEvent=true)
Prepare the next event.
ldmx::RunHeader * getRunHeaderPtr(int runNumber)
Update the RunHeader for a given run, if it exists in the input file.
Long64_t ientry_
The current entry in the tree.
Definition EventFile.h:285
std::vector< std::pair< std::string, bool > > pre_clone_rules_
Pre-clone rules.
Definition EventFile.h:317
void writeRunHeader(ldmx::RunHeader &runHeader)
Write the run header into the run map.
bool is_loopable_
True if this is an input file with pileup overlay events *‍/.
Definition EventFile.h:297
~EventFile()
Destructor.
void importRunHeaders()
Fill the internal map of run numbers to RunHeader objects from the input file.
int skipToEvent(int offset)
Skip events using an offset.
std::map< int, std::pair< bool, ldmx::RunHeader * > > run_map_
Map of run numbers to RunHeader objects.
Definition EventFile.h:338
std::string file_name_
The file name.
Definition EventFile.h:288
ldmx::RunHeader & getRunHeader(int runNumber)
Get the RunHeader for a given run, if it exists in the input file.
bool is_single_output_
True if there is only one output file.
Definition EventFile.h:294
EventFile(const framework::config::Parameters &params, const std::string &filename, EventFile *parent, bool isOutputFile, bool isSingleOutput, bool isLoopable)
Constructor to make a general file.
Definition EventFile.cxx:15
EventFile * parent_
A parent file containing event data.
Definition EventFile.h:306
std::vector< std::string > reactivate_rules_
Vector of drop rules that have been parsed and need to be used to reactivate these branches on the in...
Definition EventFile.h:326
bool is_output_file_
True if file is an output file being written to disk.
Definition EventFile.h:291
TTree * tree_
The tree with event data.
Definition EventFile.h:303
Event * event_
The object containing the actual event data (trees and branches).
Definition EventFile.h:309
bool isCorrupted() const
Check if the file we have is corrupted.
Implements an event buffer system for storing event data.
Definition Event.h:42
void addIgnore(const std::string &exp)
Add an ignore rule to the list of regex expressions to ignore on input.
Definition Event.cxx:34
void clear()
Clear this object's data (including passengers).
Definition Event.cxx:179
TTree * createTree()
Create the output data tree.
Definition Event.cxx:115
void setOutputTree(TTree *tree)
Set the output data tree.
Definition Event.cxx:121
void beforeFill()
Action to be executed before the tree is filled.
Definition Event.cxx:170
void onEndOfEvent()
Perform end of event action (doesn't do anything right now).
Definition Event.cxx:184
bool nextEvent()
Go to the next event by retrieving the event header.
Definition Event.cxx:165
void setInputTree(TTree *tree)
Set the input data tree.
Definition Event.cxx:123
void addDrop(const std::string &exp)
Add a drop rule to the list of regex expressions to drop.
Definition Event.cxx:24
Class encapsulating parameters for configuring a processor.
Definition Parameters.h:29
const T & get(const std::string &name) const
Retrieve the parameter of the given name.
Definition Parameters.h:78
static const std::string BRANCH
Name of EventHeader branch.
Definition EventHeader.h:49
Run-specific configuration and data stored in its own output TTree alongside the event TTree in the o...
Definition RunHeader.h:57
int getRunNumber() const
Definition RunHeader.h:77
All classes in the ldmx-sw project use this namespace.