@@ -57,6 +57,10 @@ inline const char *nodeTypeToString(node_type NodeType) {
57
57
return " host_task" ;
58
58
case node_type::native_command:
59
59
return " native_command" ;
60
+ case node_type::async_malloc:
61
+ return " async_malloc" ;
62
+ case node_type::async_free:
63
+ return " async_free" ;
60
64
}
61
65
assert (false && " Unhandled node type" );
62
66
return {};
@@ -340,7 +344,7 @@ graph_impl::graph_impl(const sycl::context &SyclContext,
340
344
const sycl::device &SyclDevice,
341
345
const sycl::property_list &PropList)
342
346
: MContext(SyclContext), MDevice(SyclDevice), MRecordingQueues(),
343
- MEventsMap (), MInorderQueueMap(),
347
+ MEventsMap (), MInorderQueueMap(), MGraphMemPool(SyclContext, SyclDevice),
344
348
MID(NextAvailableID.fetch_add(1 , std::memory_order_relaxed)) {
345
349
checkGraphPropertiesAndThrow (PropList);
346
350
if (PropList.has_property <property::graph::no_cycle_check>()) {
@@ -752,12 +756,12 @@ void graph_impl::beginRecording(
752
756
}
753
757
}
754
758
755
- // Check if nodes are empty and if so loop back through predecessors until we
756
- // find the real dependency.
759
+ // Check if nodes do not require enqueueing and if so loop back through
760
+ // predecessors until we find the real dependency.
757
761
void exec_graph_impl::findRealDeps (
758
762
std::vector<ur_exp_command_buffer_sync_point_t > &Deps,
759
763
std::shared_ptr<node_impl> CurrentNode, int ReferencePartitionNum) {
760
- if (CurrentNode->isEmpty ()) {
764
+ if (! CurrentNode->requiresEnqueue ()) {
761
765
for (auto &N : CurrentNode->MPredecessors ) {
762
766
auto NodeImpl = N.lock ();
763
767
findRealDeps (Deps, NodeImpl, ReferencePartitionNum);
@@ -877,9 +881,9 @@ void exec_graph_impl::createCommandBuffers(
877
881
Partition->MCommandBuffers [Device] = OutCommandBuffer;
878
882
879
883
for (const auto &Node : Partition->MSchedule ) {
880
- // Empty nodes are not processed as other nodes, but only their
884
+ // Some nodes are not scheduled like other nodes, and only their
881
885
// dependencies are propagated in findRealDeps
882
- if (Node->isEmpty ())
886
+ if (! Node->requiresEnqueue ())
883
887
continue ;
884
888
885
889
sycl::detail::CGType type = Node->MCGType ;
@@ -945,6 +949,8 @@ exec_graph_impl::exec_graph_impl(sycl::context Context,
945
949
946
950
exec_graph_impl::~exec_graph_impl () {
947
951
try {
952
+ MGraphImpl->markExecGraphDestroyed ();
953
+
948
954
const sycl::detail::AdapterPtr &Adapter =
949
955
sycl::detail::getSyclObjImpl (MContext)->getAdapter ();
950
956
MSchedule.clear ();
@@ -954,6 +960,9 @@ exec_graph_impl::~exec_graph_impl() {
954
960
Event->wait (Event);
955
961
}
956
962
963
+ // Clean up any graph-owned allocations that were allocated
964
+ MGraphImpl->getMemPool ().deallocateAndUnmapAll ();
965
+
957
966
for (const auto &Partition : MPartitions) {
958
967
Partition->MSchedule .clear ();
959
968
for (const auto &Iter : Partition->MCommandBuffers ) {
@@ -1812,6 +1821,14 @@ modifiable_command_graph::finalize(const sycl::property_list &PropList) const {
1812
1821
// Graph is read and written in this scope so we lock
1813
1822
// this graph with full priviledges.
1814
1823
graph_impl::WriteLock Lock (impl->MMutex );
1824
+ // If the graph uses graph-owned allocations and an executable graph already
1825
+ // exists we must throw an error.
1826
+ if (impl->getMemPool ().hasAllocations () && impl->getExecGraphCount () > 0 ) {
1827
+ throw sycl::exception (sycl::make_error_code (errc::invalid),
1828
+ " Graphs containing allocations can only have a "
1829
+ " single executable graph alive at any one time." );
1830
+ }
1831
+
1815
1832
return command_graph<graph_state::executable>{
1816
1833
this ->impl , this ->impl ->getContext (), PropList};
1817
1834
}
@@ -1939,11 +1956,16 @@ executable_command_graph::executable_command_graph(
1939
1956
const property_list &PropList)
1940
1957
: impl(std::make_shared<detail::exec_graph_impl>(Ctx, Graph, PropList)) {
1941
1958
finalizeImpl (); // Create backend representation for executable graph
1959
+ // Mark that we have created an executable graph from the modifiable graph.
1960
+ Graph->markExecGraphCreated ();
1942
1961
}
1943
1962
1944
1963
void executable_command_graph::finalizeImpl () {
1945
1964
impl->makePartitions ();
1946
1965
1966
+ // Handle any work required for graph-owned memory allocations
1967
+ impl->finalizeMemoryAllocations ();
1968
+
1947
1969
auto Device = impl->getGraphImpl ()->getDevice ();
1948
1970
for (auto Partition : impl->getPartitions ()) {
1949
1971
if (!Partition->isHostTask ()) {
@@ -1971,6 +1993,13 @@ void executable_command_graph::update(const std::vector<node> &Nodes) {
1971
1993
impl->update (NodeImpls);
1972
1994
}
1973
1995
1996
+ size_t executable_command_graph::get_required_mem_size () const {
1997
+ // Since each graph has a unique mem pool, return the current memory usage for
1998
+ // now. This call my change if we move to being able to share memory between
1999
+ // unique graphs.
2000
+ return impl->getGraphImpl ()->getMemPool ().getMemUseCurrent ();
2001
+ }
2002
+
1974
2003
dynamic_parameter_base::dynamic_parameter_base (
1975
2004
command_graph<graph_state::modifiable> Graph)
1976
2005
: impl(std::make_shared<dynamic_parameter_impl>(
0 commit comments