Skip to content

Commit a001f6d

Browse files
committed
[rfile] add GetKeys() methods
1 parent cf0a710 commit a001f6d

File tree

3 files changed

+387
-28
lines changed

3 files changed

+387
-28
lines changed

io/io/inc/ROOT/RFile.hxx

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
#include <ROOT/RError.hxx>
1212

1313
#include <memory>
14+
#include <iostream>
1415
#include <string_view>
1516
#include <typeinfo>
1617

17-
class TFile;
18+
class TIterator;
1819
class TKey;
20+
class TFile;
1921

2022
namespace ROOT {
2123
namespace Experimental {
@@ -29,6 +31,93 @@ ROOT::RLogChannel &RFileLog();
2931

3032
} // namespace Internal
3133

34+
/**
35+
\class ROOT::Experimental::RFileKeyInfo
36+
\ingroup RFile
37+
\brief Information about an RFile object's Key.
38+
39+
Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
40+
etc.
41+
Querying this information can be done via RFile::GetKeys() or RFile::GetKeysNonRecursive. Reading an object's Key
42+
doesn't deserialize the full object, so it's a relatively lightweight operation.
43+
*/
44+
struct RFileKeyInfo {
45+
std::string fName;
46+
std::string fTitle;
47+
std::string fClassName;
48+
std::uint16_t fCycle;
49+
};
50+
51+
/// The iterable returned by RFile::GetKeys() and RFile::GetKeysNonRecursive()
52+
class RFileKeyIterable final {
53+
using Pattern_t = std::string;
54+
55+
TFile *fFile;
56+
Pattern_t fPattern;
57+
std::uint32_t fFlags = 0;
58+
59+
public:
60+
enum EFlags {
61+
kNone = 0,
62+
kRecursive = 1 << 0,
63+
};
64+
65+
class RIterator {
66+
friend class RFileKeyIterable;
67+
68+
struct RIterStackElem {
69+
// This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
70+
// to be copy-constructible.
71+
std::shared_ptr<TIterator> fIter;
72+
std::string fDirPath;
73+
74+
// Outlined to avoid including TIterator.h
75+
RIterStackElem(TIterator *it, const std::string &path = "");
76+
// Outlined to avoid including TIterator.h
77+
~RIterStackElem();
78+
79+
// fDirPath doesn't need to be compared because it's implied by fIter.
80+
bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
81+
};
82+
83+
std::vector<RIterStackElem> fIterStack;
84+
Pattern_t fPattern;
85+
const TKey *fCurKey = nullptr;
86+
std::uint16_t fRootDirNesting = 0;
87+
std::uint32_t fFlags = 0;
88+
89+
void Advance();
90+
91+
// NOTE: `iter` here is an owning pointer (or null)
92+
RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
93+
94+
public:
95+
using iterator = RIterator;
96+
using iterator_category = std::forward_iterator_tag;
97+
using difference_type = std::ptrdiff_t;
98+
using value_type = RFileKeyInfo;
99+
using pointer = const value_type *;
100+
using reference = const value_type &;
101+
102+
iterator &operator++()
103+
{
104+
Advance();
105+
return *this;
106+
}
107+
value_type operator*();
108+
bool operator!=(const iterator &rh) const { return !(*this == rh); }
109+
bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
110+
};
111+
112+
RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
113+
: fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
114+
{
115+
}
116+
117+
RIterator begin() const;
118+
RIterator end() const;
119+
};
120+
32121
/**
33122
\class ROOT::Experimental::RFile
34123
\ingroup RFile
@@ -196,6 +285,31 @@ public:
196285

197286
/// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
198287
void Close();
288+
289+
/// Returns an iterable over all paths of objects written into this RFile starting at path "rootPath".
290+
/// The returned paths are always "absolute" paths: they are not relative to `rootPath`.
291+
/// Keys relative to directories are not returned: only those relative to leaf objects are.
292+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
293+
/// This recurses on all the subdirectories of `rootPath`. If you only want the immediate children of `rootPath`,
294+
/// use GetKeysNonRecursive().
295+
RFileKeyIterable GetKeys(std::string_view rootPath = "") const
296+
{
297+
return RFileKeyIterable(fFile.get(), rootPath, RFileKeyIterable::kRecursive);
298+
}
299+
300+
/// Returns an iterable over all paths of objects written into this RFile contained in the directory "rootPath".
301+
/// The returned paths are always "absolute" paths: they are not relative to `rootPath`.
302+
/// Keys relative to directories are not returned: only those relative to leaf objects are.
303+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
304+
/// This only returns the immediate children of `rootPath`. If you want to recurse into the subdirectories of
305+
/// `rootPath`, use GetKeys().
306+
RFileKeyIterable GetKeysNonRecursive(std::string_view rootPath = "") const
307+
{
308+
return RFileKeyIterable(fFile.get(), rootPath, RFileKeyIterable::kNone);
309+
}
310+
311+
/// Prints the internal structure of this RFile to the given stream.
312+
void Print(std::ostream &out = std::cout) const;
199313
};
200314

201315
} // namespace Experimental

io/io/src/RFile.cxx

Lines changed: 125 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
#include <Byteswap.h>
1414
#include <TError.h>
1515
#include <TFile.h>
16+
#include <TIterator.h>
1617
#include <TKey.h>
18+
#include <TList.h>
1719
#include <TROOT.h>
1820

1921
#include <algorithm>
@@ -184,10 +186,6 @@ static std::string ValidateAndNormalizePath(std::string &path)
184186

185187
/////////////////////////////////////////////////////////////////////////////////////////////////
186188

187-
RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
188-
189-
RFile::~RFile() = default;
190-
191189
std::unique_ptr<RFile> RFile::Open(std::string_view path)
192190
{
193191
CheckExtension(path);
@@ -227,6 +225,10 @@ std::unique_ptr<RFile> RFile::Recreate(std::string_view path)
227225
return rfile;
228226
}
229227

228+
RFile::RFile(std::unique_ptr<TFile> file) : fFile(std::move(file)) {}
229+
230+
RFile::~RFile() = default;
231+
230232
TKey *RFile::GetTKey(std::string_view path) const
231233
{
232234
// In RFile, differently from TFile, when dealing with a path like "a/b/c", we always consider it to mean
@@ -373,6 +375,125 @@ void RFile::PutUntyped(std::string_view pathSV, const std::type_info &type, cons
373375
}
374376
}
375377

378+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::RIterStackElem(TIterator *it, const std::string &path)
379+
: fIter(it), fDirPath(path)
380+
{
381+
}
382+
383+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterStackElem::~RIterStackElem() = default;
384+
385+
ROOT::Experimental::RFileKeyIterable::RIterator::RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags)
386+
: fPattern(pattern), fFlags(flags)
387+
{
388+
if (iter) {
389+
fIterStack.emplace_back(iter);
390+
391+
if (!pattern.empty()) {
392+
fRootDirNesting = std::count(pattern.begin(), pattern.end(), '/');
393+
// `pattern` may or may not end with '/', but we consider it a directory regardless.
394+
// In other words, like in virtually all filesystem operations, "dir" and "dir/" are equivalent.
395+
fRootDirNesting += pattern.back() != '/';
396+
}
397+
398+
// Advance the iterator to skip the first key, which is always the TFile key.
399+
// This will also skip keys until we reach the first correct key we want to return.
400+
Advance();
401+
}
402+
}
403+
404+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::begin() const
405+
{
406+
return {fFile->GetListOfKeys()->MakeIterator(), fPattern, fFlags};
407+
}
408+
409+
ROOT::Experimental::RFileKeyIterable::RIterator ROOT::Experimental::RFileKeyIterable::end() const
410+
{
411+
return {nullptr, fPattern, fFlags};
412+
}
413+
414+
void ROOT::Experimental::RFileKeyIterable::RIterator::Advance()
415+
{
416+
fCurKey = nullptr;
417+
418+
const bool recursive = fFlags & kRecursive;
419+
420+
// We only want to return keys that refer to user objects, not internal ones, therefore we skip
421+
// all keys that have internal class names.
422+
while (!fIterStack.empty()) {
423+
auto &[iter, dirPath] = fIterStack.back();
424+
assert(iter);
425+
TObject *keyObj = iter->Next();
426+
if (!keyObj) {
427+
// reached end of the iteration
428+
fIterStack.pop_back();
429+
continue;
430+
}
431+
432+
assert(keyObj->IsA() == TClass::GetClass<TKey>());
433+
auto key = static_cast<TKey *>(keyObj);
434+
435+
const auto dirSep = (dirPath.empty() ? "" : "/");
436+
437+
if (strcmp(key->GetClassName(), "TDirectory") == 0 || strcmp(key->GetClassName(), "TDirectoryFile") == 0) {
438+
TDirectory *dir = key->ReadObject<TDirectory>();
439+
TIterator *innerIter = dir->GetListOfKeys()->MakeIterator();
440+
assert(innerIter);
441+
fIterStack.emplace_back(innerIter, dirPath + dirSep + dir->GetName());
442+
continue;
443+
}
444+
445+
// Reconstruct the full path of the key
446+
const auto &fullPath = dirPath + dirSep + key->GetName();
447+
const auto nesting = fIterStack.size() - 1;
448+
449+
// skip key if it's not a child of root dir
450+
if (!ROOT::StartsWith(fullPath, fPattern))
451+
continue;
452+
453+
// check that we are in the same directory as "rootDir".
454+
if (!recursive && nesting != fRootDirNesting)
455+
continue;
456+
457+
// All checks passed: return this key.
458+
assert(!fullPath.empty());
459+
fCurKey = key;
460+
break;
461+
}
462+
}
463+
464+
ROOT::Experimental::RFileKeyInfo ROOT::Experimental::RFileKeyIterable::RIterator::operator*()
465+
{
466+
if (fIterStack.empty())
467+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
468+
469+
const TKey *key = fCurKey;
470+
if (!key)
471+
throw ROOT::RException(R__FAIL("tried to dereference an invalid iterator"));
472+
473+
const auto &dirPath = fIterStack.back().fDirPath;
474+
475+
RFileKeyInfo keyInfo;
476+
keyInfo.fName = dirPath + (dirPath.empty() ? "" : "/") + key->GetName();
477+
keyInfo.fClassName = key->GetClassName();
478+
keyInfo.fCycle = key->GetCycle();
479+
keyInfo.fTitle = key->GetTitle();
480+
return keyInfo;
481+
}
482+
483+
void RFile::Print(std::ostream &out) const
484+
{
485+
std::vector<RFileKeyInfo> keys;
486+
auto keysIter = GetKeys();
487+
for (const auto &key : keysIter) {
488+
keys.emplace_back(key);
489+
}
490+
491+
std::sort(keys.begin(), keys.end(), [](const auto &a, const auto &b) { return a.fName < b.fName; });
492+
for (const auto &key : keys) {
493+
out << key.fClassName << " " << key.fName << ";" << key.fCycle << ": \"" << key.fTitle << "\"\n";
494+
}
495+
}
496+
376497
size_t RFile::Flush()
377498
{
378499
return fFile->Write();

0 commit comments

Comments
 (0)