Skip to content

Commit b8cb541

Browse files
committed
[rfile] add ListKeys() method
1 parent e6538e1 commit b8cb541

File tree

3 files changed

+527
-33
lines changed

3 files changed

+527
-33
lines changed

io/io/inc/ROOT/RFile.hxx

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
#include <ROOT/RError.hxx>
1212

1313
#include <memory>
14+
#include <iostream>
1415
#include <string_view>
1516
#include <typeinfo>
1617

1718
class TFile;
19+
class TIterator;
1820
class TKey;
1921

2022
namespace ROOT {
@@ -29,6 +31,121 @@ ROOT::RLogChannel &RFileLog();
2931

3032
} // namespace Internal
3133

34+
/// Given a "path-like" string (like foo/bar/baz), returns a pair `{ dirName, baseName }`.
35+
/// `baseName` will be empty if the string ends with '/'.
36+
/// `dirName` will be empty if the string contains no '/'.
37+
/// `dirName`, if not empty, always ends with a '/'.
38+
/// NOTE: this function does no semantic checking or path expansion, nor does it interact with the
39+
/// filesystem in any way (so it won't follow symlink or anything like that).
40+
/// Moreover it doesn't trim the path in any way, so any leading or trailing whitespaces will be preserved.
41+
/// This function does not perform any copy: the returned string_views have the same lifetime as `path`.
42+
std::pair<std::string_view, std::string_view> DecomposePath(std::string_view path);
43+
44+
class RFileKeyIterable;
45+
46+
/**
47+
\class ROOT::Experimental::RKeyInfo
48+
\ingroup RFile
49+
\brief Information about an RFile object's Key.
50+
51+
Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
52+
etc.
53+
Querying this information can be done via RFile::ListKeys(). Reading an object's Key
54+
doesn't deserialize the full object, so it's a relatively lightweight operation.
55+
*/
56+
class RKeyInfo final {
57+
friend class ROOT::Experimental::RFileKeyIterable;
58+
59+
public:
60+
enum class ECategory : std::uint16_t {
61+
kInvalid,
62+
kObject,
63+
kDirectory
64+
};
65+
66+
private:
67+
std::string fPath;
68+
std::string fTitle;
69+
std::string fClassName;
70+
std::uint16_t fCycle = 0;
71+
ECategory fCategory = ECategory::kInvalid;
72+
73+
public:
74+
/// Returns the absolute path of this key, i.e. the directory part plus the object name.
75+
const std::string &GetPath() const { return fPath; }
76+
/// Returns the base name of this key, i.e. the name of the object without the directory part.
77+
std::string GetBaseName() const { return std::string(DecomposePath(fPath).second); }
78+
const std::string &GetTitle() const { return fTitle; }
79+
const std::string &GetClassName() const { return fClassName; }
80+
std::uint16_t GetCycle() const { return fCycle; }
81+
ECategory GetCategory() const { return fCategory; }
82+
};
83+
84+
/// The iterable returned by RFile::ListKeys()
85+
class RFileKeyIterable final {
86+
using Pattern_t = std::string;
87+
88+
TFile *fFile = nullptr;
89+
Pattern_t fPattern;
90+
std::uint32_t fFlags = 0;
91+
92+
public:
93+
class RIterator {
94+
friend class RFileKeyIterable;
95+
96+
struct RIterStackElem {
97+
// This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
98+
// to be copy-constructible.
99+
std::shared_ptr<TIterator> fIter;
100+
std::string fDirPath;
101+
102+
// Outlined to avoid including TIterator.h
103+
RIterStackElem(TIterator *it, const std::string &path = "");
104+
// Outlined to avoid including TIterator.h
105+
~RIterStackElem();
106+
107+
// fDirPath doesn't need to be compared because it's implied by fIter.
108+
bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
109+
};
110+
111+
std::vector<RIterStackElem> fIterStack;
112+
Pattern_t fPattern;
113+
const TKey *fCurKey = nullptr;
114+
std::uint16_t fRootDirNesting = 0;
115+
std::uint32_t fFlags = 0;
116+
117+
void Advance();
118+
119+
// NOTE: `iter` here is an owning pointer (or null)
120+
RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
121+
122+
public:
123+
using iterator = RIterator;
124+
using iterator_category = std::input_iterator_tag;
125+
using difference_type = std::ptrdiff_t;
126+
using value_type = RKeyInfo;
127+
using pointer = const value_type *;
128+
using reference = const value_type &;
129+
130+
iterator &operator++()
131+
{
132+
Advance();
133+
return *this;
134+
}
135+
value_type operator*();
136+
bool operator!=(const iterator &rh) const { return !(*this == rh); }
137+
bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
138+
};
139+
140+
RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
141+
: fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
142+
{
143+
}
144+
145+
RIterator begin() const;
146+
RIterator end() const;
147+
};
148+
32149
/**
33150
\class ROOT::Experimental::RFile
34151
\ingroup RFile
@@ -68,7 +185,7 @@ Even though there is no equivalent of TDirectory in the RFile API, directories a
68185
(since they are a concept in the ROOT binary format). However they are for now only interacted with indirectly, via the
69186
use of filesystem-like string-based paths. If you Put an object in an RFile under the path "path/to/object", "object"
70187
will be stored under directory "to" which is in turn stored under directory "path". This hierarchy is encoded in the
71-
ROOT file itself and it can provide some optimization and/or conveniencies when querying objects.
188+
ROOT file itself and it can provide some optimization and/or conveniences when querying objects.
72189
73190
For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are
74191
used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like
@@ -126,6 +243,12 @@ class RFile final {
126243
TKey *GetTKey(std::string_view path) const;
127244

128245
public:
246+
enum EListKeyFlags {
247+
kListObjects = 1 << 0,
248+
kListDirs = 1 << 1,
249+
kListRecursive = 1 << 2,
250+
};
251+
129252
// This is arbitrary, but it's useful to avoid pathological cases
130253
static constexpr int kMaxPathNesting = 1000;
131254

@@ -196,6 +319,39 @@ public:
196319

197320
/// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
198321
void Close();
322+
323+
/// Returns an iterable over all keys of objects and/or directories written into this RFile starting at path
324+
/// `rootPath` (defaulting to include the content of all subdirectories).
325+
/// By default, keys relative to directories are not returned: only those relative to leaf objects are.
326+
/// If `rootPath` is the path of a leaf object, only `rootPath` itself will be returned.
327+
/// `flags` is a bitmask specifying the listing mode.
328+
/// If `(flags & kListObject) != 0`, the listing will include keys of non-directory objects (default);
329+
/// If `(flags & kListDirs) != 0`, the listing will include keys of directory objects;
330+
/// If `(flags & kListRecursive) != 0`, the listing will recurse on all subdirectories of `rootPath` (default),
331+
/// otherwise it will only list immediate children of `rootPath`.
332+
///
333+
/// Example usage:
334+
/// ~~~{.cpp}
335+
/// for (auto keyInfo : file->ListKeys()) {
336+
/// /* iterate over all objects in the RFile */
337+
/// }
338+
/// for (auto keyInfo : file->ListKeys("", kListDirs|kListObjects|kListRecursive)) {
339+
/// /* iterate over all objects and directories in the RFile */
340+
/// }
341+
/// for (auto keyInfo : file->ListKeys("a/b", kListObjects)) {
342+
/// /* iterate over all objects that are immediate children of directory "a/b" */
343+
/// }
344+
/// for (auto keyInfo : file->ListKeys("foo", kListDirs|kListRecursive)) {
345+
/// /* iterate over all directories under directory "foo", recursively */
346+
/// }
347+
/// ~~~
348+
RFileKeyIterable ListKeys(std::string_view rootPath = "", std::uint32_t flags = kListObjects | kListRecursive) const
349+
{
350+
return RFileKeyIterable(fFile.get(), rootPath, flags);
351+
}
352+
353+
/// Prints the internal structure of this RFile to the given stream.
354+
void Print(std::ostream &out = std::cout) const;
199355
};
200356

201357
} // namespace Experimental

0 commit comments

Comments
 (0)