From 130a9084913722bf024e6ab1fe1ea65d6ee08ccc Mon Sep 17 00:00:00 2001 From: Wendy Liang Date: Thu, 14 Mar 2019 10:52:39 -0700 Subject: [PATCH 01/24] examples: linux: zynqmp: Add IPI uio based API implementation Add IPI uio based API implementation. Signed-off-by: Wendy Liang --- .../linux/zynqmp/zynqmp_amp_demo/ipi-uio.c | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c new file mode 100644 index 00000000..c807b803 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the follwing steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +struct ipi_channel { + struct metal_device *ipi_dev; /**< ipi metal device */ + struct metal_io_region *ipi_io; /**< ipi metal I/O region */ + int ipi_irq; /**< ipi irq id */ + uint32_t ipi_mask; /**< remote IPI mask */ + metal_irq_handler ipi_kick_cb; /**< IPI kick callback */ + void *ipi_kick_priv; /**< IPI kick callback private data */ +}; + +static struct ipi_channel ipi_chnl; + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int _ipi_irq_handler (int vect_id, void *priv) +{ + uint32_t val; + struct ipi_channel *chnl = (struct ipi_channel *)priv; + struct metal_io_region *io; + + (void)vect_id; + + io = chnl->ipi_io; + val = metal_io_read32(io, IPI_ISR_OFFSET); + if (val & chnl->ipi_mask) { + if (chnl->ipi_kick_cb != NULL) + chnl->ipi_kick_cb(vect_id, chnl->ipi_kick_priv); + metal_io_write32(io, IPI_ISR_OFFSET, + chnl->ipi_mask); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +static void _enable_ipi_intr(struct ipi_channel *chnl) +{ + metal_irq_enable(chnl->ipi_irq); + /* Enable IPI interrupt */ + metal_io_write32(chnl->ipi_io, IPI_IER_OFFSET, chnl->ipi_mask); +} + +static void _disable_ipi_intr(struct ipi_channel *chnl) +{ + /* disable IPI interrupt */ + metal_io_write32(chnl->ipi_io, IPI_IDR_OFFSET, chnl->ipi_mask); + metal_irq_disable(ipi_chnl.ipi_irq); +} + +void ipi_kick_register_handler(metal_irq_handler hd, void *priv) +{ + ipi_chnl.ipi_kick_cb = hd; + ipi_chnl.ipi_kick_priv = priv; +} +int init_ipi(void) +{ + struct metal_device *dev; + struct metal_io_region *io; + int ret; + + /* Open IPI device */ + ret = metal_device_open(BUS_NAME, IPI_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", IPI_DEV_NAME); + return ret; + } + + /* Get IPI device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + metal_device_close(dev); + return ret; + } + ipi_chnl.ipi_dev = dev; + ipi_chnl.ipi_io = io; + + /* Get the IPI IRQ from the opened IPI device */ + ipi_chnl.ipi_irq = (intptr_t)dev->irq_info; + + ipi_chnl.ipi_mask = IPI_MASK; + /* disable IPI interrupt */ + _disable_ipi_intr(&ipi_chnl); + /* clear old IPI interrupt */ + metal_io_write32(io, IPI_ISR_OFFSET, IPI_MASK); + /* Register IPI irq handler */ + metal_irq_register(ipi_chnl.ipi_irq, _ipi_irq_handler, &ipi_chnl); + return 0; +} + +void deinit_ipi(void) +{ + /* disable IPI interrupt */ + _disable_ipi_intr(&ipi_chnl); + /* unregister IPI irq handler by setting the handler to 0 */ + metal_irq_unregister(ipi_chnl.ipi_irq); + if (ipi_chnl.ipi_dev) { + metal_device_close(ipi_chnl.ipi_dev); + ipi_chnl.ipi_dev = NULL; + } +} + +void kick_ipi(void *msg) +{ + (void)msg; + metal_io_write32(ipi_chnl.ipi_io, IPI_TRIG_OFFSET, ipi_chnl.ipi_mask); +} + +void disable_ipi_kick(void) +{ + _disable_ipi_intr(&ipi_chnl); +} +void enable_ipi_kick(void) +{ + _enable_ipi_intr(&ipi_chnl); +} From 82f7603b6ad755049e16fb18ddf28d6f2f7c8510 Mon Sep 17 00:00:00 2001 From: Wendy Liang Date: Mon, 18 Mar 2019 08:49:21 -0700 Subject: [PATCH 02/24] examples: linux: add IPI with R5 rproc kick sysfs impelmentation Add IPI mailbox IPI implementation with the remoteproc kick sysfs API. Signed-off-by: Wendy Liang --- .../linux/zynqmp/zynqmp_amp_demo/ipi-mb.c | 251 ++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c new file mode 100644 index 00000000..11c952cf --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the follwing steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RPROC_SYSFS "/sys/class/remoteproc/remoteproc0" +#define RPROC_SYSFS_KICK RPROC_SYSFS"/kick" +#define RPROC_SYSFS_REMOTE_KICK RPROC_SYSFS"/remote_kick" + +struct ipi_channel { + char *kickf; /**< IPI kick sysfs path */ + char *rkickf; /**< IPI remote kick sysfs path */ + int rkick_fd; /**< IPI remote kick file descriptor */ + atomic_int ipi_enabled; /**< flag to indicate if IPI is enabled */ + atomic_int ipi_handling_wakeup; /**< IPI handling wakeup */ + metal_irq_handler ipi_kick_cb; /**< IPI kick callback */ + void *ipi_kick_priv; /**< IPI kick callback private data */ +}; + +static struct ipi_channel ipi_chnl; +static pthread_t ipi_pthread; +static int ipi_handling_stop_fd; + +/** + * @brief ipi_kick_handling() - IPI kick handling + * + * It will listening to the remote kick sysfs file. + * It will call the registered handler if there is a kick from the + * remote. + * + * @param[in] args - argument of pthread. + * @return NULL. + * + */ +static void *_ipi_kick_handling (void *args) +{ + int rkick_fd; + sigset_t set; + + (void)args; + /* unblock all signals */ + sigfillset(&set); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); + rkick_fd = ipi_chnl.rkick_fd; + while(1) { + char val[2] = {0, 0}; + int ret; + + lseek(rkick_fd, 0, SEEK_SET); + ret = read(rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to read %s.\n", ipi_chnl.rkickf); + return NULL; + } + if (val[0] == '1') { + if (atomic_load(&ipi_chnl.ipi_enabled) == 0) { + int v; + + v = atomic_load(&ipi_chnl.ipi_handling_wakeup); + syscall(SYS_futex, + &ipi_chnl.ipi_handling_wakeup, + FUTEX_WAIT, v, NULL, NULL, 0); + } + if (ipi_chnl.ipi_kick_cb) { + ipi_chnl.ipi_kick_cb(rkick_fd, + ipi_chnl.ipi_kick_priv); + } + lseek(rkick_fd, 0, SEEK_SET); + ret = write(rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to write %s.\n", + ipi_chnl.rkickf); + return NULL; + } + } else { + struct pollfd fds[2]; + + fds[0].fd = rkick_fd; + fds[0].events = POLLPRI; + fds[1].fd = ipi_handling_stop_fd; + fds[1].events = POLLIN; + ret = poll(fds, 2, -1); + if (ret == 0) { + LPERROR("polling %s times out.\n", + ipi_chnl.rkickf); + return NULL; + } else if (fds[1].revents & POLLIN) { + return NULL; + + } else if (ret < 0) { + LPERROR("polling %s failed, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + return NULL; + } + } + } +} + +static void _enable_ipi_intr(struct ipi_channel *chnl) +{ + int exp = 0; + + atomic_compare_exchange_strong(&chnl->ipi_enabled, &exp, 1); + if (exp == 0) { + atomic_fetch_add(&chnl->ipi_handling_wakeup, 1); + syscall(SYS_futex, &chnl->ipi_handling_wakeup, FUTEX_WAKE, + 1, NULL, NULL, 0); + } +} + +static void _disable_ipi_intr(struct ipi_channel *chnl) +{ + atomic_store(&chnl->ipi_enabled, 0); +} + +void ipi_kick_register_handler(metal_irq_handler hd, void *priv) +{ + ipi_chnl.ipi_kick_cb = hd; + ipi_chnl.ipi_kick_priv = priv; +} + +int init_ipi(void) +{ + int ret; + int val = 0; + + atomic_init(&ipi_chnl.ipi_enabled, 0); + atomic_init(&ipi_chnl.ipi_handling_wakeup, 0); + ipi_chnl.kickf = RPROC_SYSFS_KICK; + ipi_chnl.rkickf = RPROC_SYSFS_REMOTE_KICK; + + ret = open(ipi_chnl.rkickf, O_RDWR); + if (ret < 0) { + LPERROR("failed to open %s, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + return ret; + } + ipi_chnl.rkick_fd = ret; + /** Write to the remote kick fd to clear pending IPI interrupt */ + ret = write(ipi_chnl.rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to write to %s, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + close(ipi_chnl.rkick_fd); + return ret; + } + + ipi_handling_stop_fd = eventfd(0,0); + if (ipi_handling_stop_fd < 0) { + LPERROR("failed to create ipi handling stop fd.\n"); + close(ipi_chnl.rkick_fd); + return ipi_handling_stop_fd; + } + + ret = pthread_create(&ipi_pthread, NULL, _ipi_kick_handling, NULL); + if (ret < 0) { + LPERROR("failed to create IPI handling thread.\n"); + close(ipi_chnl.rkick_fd); + close(ipi_handling_stop_fd); + return ret; + } + return 0; +} + +void deinit_ipi(void) +{ + int ret; + uint64_t val = 1; + + ret = write(ipi_handling_stop_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to notify deinit ipi, %s.\n", strerror(ret)); + } + atomic_fetch_add(&ipi_chnl.ipi_handling_wakeup, 1); + syscall(SYS_futex, &ipi_chnl.ipi_handling_wakeup, FUTEX_WAKE, + 1, NULL, NULL, 0); + ret = pthread_join(ipi_pthread, NULL); + if (ret) { + LPERROR("failed to join IPI thread: %d.\n", ret); + } + close(ipi_handling_stop_fd); + close(ipi_chnl.rkick_fd); +} + +void kick_ipi(void *msg) +{ + int fd, ret; + int buf = -1; + + (void)msg; + fd = open(ipi_chnl.kickf, O_WRONLY); + if (fd < 0) { + LPERROR("failed to open %s.\n", ipi_chnl.kickf); + return; + } + + ret = write(fd, &buf, sizeof(buf)); + if (ret <= 0) + LPERROR("failed to write to %s, %s.\n", + ipi_chnl.kickf, strerror(ret)); + close(fd); +} + +void disable_ipi_kick(void) +{ + _disable_ipi_intr(&ipi_chnl); +} + +void enable_ipi_kick(void) +{ + _enable_ipi_intr(&ipi_chnl); +} From 81ca9950fc9df5c1c7683669c2e750631bcef2c7 Mon Sep 17 00:00:00 2001 From: Wendy Liang Date: Mon, 18 Mar 2019 08:56:01 -0700 Subject: [PATCH 03/24] examples: linux: zynqmp: Use IPI commit API Change to use IPI common API so that it can swtich from IPI uio implementaion or IPI rproc sysfs implementation. Signed-off-by: Wendy Liang --- .../zynqmp/zynqmp_amp_demo/CMakeLists.txt | 72 +++ .../linux/zynqmp/zynqmp_amp_demo/common.h | 234 ++++++++++ .../zynqmp/zynqmp_amp_demo/ipi_latency_demo.c | 262 +++++++++++ .../zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c | 358 ++++++++++++++ .../zynqmp_amp_demo/libmetal_amp_demo.c | 135 ++++++ .../zynqmp_amp_demo/libmetal_amp_demod.c | 379 +++++++++++++++ .../zynqmp_amp_demo/shmem_atomic_demo.c | 147 ++++++ .../linux/zynqmp/zynqmp_amp_demo/shmem_demo.c | 196 ++++++++ .../zynqmp_amp_demo/shmem_latency_demo.c | 319 +++++++++++++ .../zynqmp_amp_demo/shmem_throughput_demo.c | 436 ++++++++++++++++++ .../linux/zynqmp/zynqmp_amp_demo/sys_init.c | 31 ++ .../linux/zynqmp/zynqmp_amp_demo/sys_init.h | 16 + 12 files changed, 2585 insertions(+) create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/common.h create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c create mode 100644 examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt new file mode 100644 index 00000000..aa94d8f9 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt @@ -0,0 +1,72 @@ +collector_list (_list PROJECT_INC_DIRS) +include_directories (${_list} ${CMAKE_CURRENT_SOURCE_DIR}) + +collector_list (_list PROJECT_LIB_DIRS) +link_directories (${_list}) + +collector_list (_deps PROJECT_LIB_DEPS) + +set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) +get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") +foreach (_app libmetal_amp_demo libmetal_amp_demod) + set (_src ${CMAKE_CURRENT_SOURCE_DIR}/${_app}.c) + list(APPEND _src ${_src_common}) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi-uio.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demo.c) + if (WITH_SHARED_LIB) + add_executable (${_app}-share ${_src}) + if (PROJECT_EC_FLAGS) + string(REPLACE " " ";" _ec_flgs ${PROJECT_EC_FLAGS}) + target_compile_options (${_app}-share PUBLIC ${_ec_flgs}) + endif (PROJECT_EC_FLAGS) + target_link_libraries (${_app}-share ${PROJECT_NAME}-shared ${_deps}) + install (TARGETS ${_app}-share RUNTIME DESTINATION bin) + add_dependencies (${_app}-share ${PROJECT_NAME}-shared) + endif (WITH_SHARED_LIB) + + if (WITH_STATIC_LIB) + if (${PROJECT_SYSTEM} STREQUAL "linux") + add_executable (${_app}-static ${_src}) + if (PROJECT_EC_FLAGS) + string(REPLACE " " ";" _ec_flgs ${PROJECT_EC_FLAGS}) + target_compile_options (${_app}-static PUBLIC ${_ec_flgs}) + endif (PROJECT_EC_FLAGS) + target_link_libraries (${_app}-static ${PROJECT_NAME}-static ${_deps}) + install (TARGETS ${_app}-static RUNTIME DESTINATION bin) + endif (${PROJECT_SYSTEM} STREQUAL "linux") + endif (WITH_STATIC_LIB) +endforeach (_app) + +foreach (_app libmetal_amp_mb_shmem) + set (_src ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demo.c) + list(APPEND _src ${_src_common}) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi-mb.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demo.c) + if (WITH_SHARED_LIB) + add_executable (${_app}-share ${_src}) + target_compile_options (${_app}-share PUBLIC ${_ec_flgs}) + target_link_libraries (${_app}-share ${PROJECT_NAME}-shared ${_deps}) + install (TARGETS ${_app}-share RUNTIME DESTINATION bin) + add_dependencies (${_app}-share ${PROJECT_NAME}-shared) + endif (WITH_SHARED_LIB) + + if (WITH_STATIC_LIB) + if (${PROJECT_SYSTEM} STREQUAL "linux") + add_executable (${_app}-static ${_src}) + target_compile_options (${_app}-static PUBLIC ${_ec_flgs}) + target_link_libraries (${_app}-static ${PROJECT_NAME}-static ${_deps}) + install (TARGETS ${_app}-static RUNTIME DESTINATION bin) + endif (${PROJECT_SYSTEM} STREQUAL "linux") + endif (WITH_STATIC_LIB) +endforeach (_app) + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h new file mode 100644 index 00000000..26e72dc4 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#include +#include +#include +#include +#include + +#define BUS_NAME "platform" +#define IPI_DEV_NAME "ff340000.ipi" +#define SHM_DEV_NAME "3ed80000.shm" +#define TTC_DEV_NAME "ff110000.timer" + +/* Apply this snippet to the device tree in an overlay so that + * Linux userspace can see and use TTC0: + * &TTC0 { + * compatible = "ttc0_libmetal_demo"; + * status = "okay"; + * }; + */ + + +/* IPI registers offset */ +#define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ +#define IPI_OBS_OFFSET 0x4 /* IPI observation reg offset */ +#define IPI_ISR_OFFSET 0x10 /* IPI interrupt status reg offset */ +#define IPI_IMR_OFFSET 0x14 /* IPI interrupt mask reg offset */ +#define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ +#define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ + +#define IPI_MASK 0x100 /* IPI mask for kick from RPU. */ + +/* TTC counter offsets */ +#define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ +#define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ +#define XTTCPS_CNT_VAL_OFFSET 0x18 /* TTC counter val reg offset */ +#define XTTCPS_CNT_OFFSET(ID) ((ID) == 1 ? 0 : 1 << (ID)) /* TTC counter offset + ID is from 1 to 3 */ + +/* TTC counter control masks */ +#define XTTCPS_CNT_CNTRL_RST_MASK 0x10U /* TTC counter control reset mask */ +#define XTTCPS_CNT_CNTRL_DIS_MASK 0x01U /* TTC counter control disable mask */ + +#define LPRINTF(format, ...) \ + printf("CLIENT> " format, ##__VA_ARGS__) + +#define LPERROR(format, ...) LPRINTF("ERROR: " format, ##__VA_ARGS__) + +/** + * @brief shmem_demo() - Show use of shared memory with Libmetal. + * For NUM_TIMES times, send message to RPU and notify RPU by writing to + * share mem that RPU is polling. Once detected, RPU will then similarly + * write message and notify APU and the APU will then verify the + * response. If the message does not match expected response, record + * error. Afterwards, report test result and clean up. + * Notes: + * * The RPU will repeatedly wait for shared mem. from APU until APU + * notifies remote by changing the KEEP_GOING value in shared memory. + * + * @return - return 0 on success, otherwise return error number indicating + * type of error + */ +int shmem_demo(); + +/** + * @brief ipi_shmem_demo() - shared memory IPI demo + * This task will: + * * Get the timestamp and put it into the ping shared memory + * * Update the shared memory descriptor for the new available + * ping buffer. + * * Trigger IPI to notifty the remote. + * * Repeat the above steps until it sends out all the packages. + * * Monitor IPI interrupt, verify every received package. + * * After all the packages are received, it sends out shutdown + * message to the remote. + * + * @return - return 0 on success, otherwise return error number indicating + * type of error. + */ +int ipi_shmem_demo(); + +/** + * @brief atomic_shmem_demo() - Shared memory atomic operation demo + * This task will: + * - Write to shared memory to notify the remote to start atomic add + * on the shared memory descriptor memory for 1000 times. + * - Start atomic add by 1 for 1000 times to first 32 bits of memory + * in the shared memory location at 3ed00000 which is + * pointed to by shm_io. + * - Wait for the remote to write to shared memory + * - Once it received the polling kick from the remote, it will check + * if the value stored in the shared memory for the atomic add is + * 2000. + * - It will print if the atomic add test has passed or not. + * + * @param[in] channel- hold shared mem. device + * @return - If setup failed, return the corresponding error number. Otherwise + * return 0 on success. + */ +int atomic_shmem_demo(); + +/** + * @brief ipi_latency_demo() - Show performance of IPI with Libmetal. + * For NUM_TIMES times, repeatedly send an IPI from APU and then detect + * this IPI from RPU and measure the latency. Similarly, measure the + * latency from RPU to APU. Each iteration, record this latency and + * after the loop has finished, report the total latency in nanseconds. + * Notes: + * * The RPU will repeatedly wait for IPI from APU until APU notifies + * remote by changing the KEEPGOING value in shared memory. + * * To further ensure the accuracy of the readings a different thread + * (i.e. the IRQ handler) will stop the timer measuring RPU to APU + * latency. + * + * @return - 0 on success, error code if failure. + */ +int ipi_latency_demo(); + +/** + * @brief shmem_latency_demo_demo() - Show performance of shared memory + * For 8, 512, and 1024 bytes, measure latency from block write to block + * read on remote side in shared memory. For each size, find average + * latency by running NUM_TIMES times and reporting the average latency + * for both APU block write to RPU block read as well as RPU block write + * to APU block read. + * + * @return - 0 on success, error code if failure. + */ +int shmem_latency_demo(); + +/** + * @brief shmem_throughput_demo_demo() - Show performance of shared memory + * Record average throughput for APU block read, write, RPU block read + * and write for sizes 1/2KB, 1KB and 2KB. For each size, run 1000 times + * each operation and record average. + * + * @return - 0 on success, error code if failure. + */ +int shmem_throughput_demo(); + +/** + * @brief wait_for_notified() - Loop until notified bit in channel is set. + * + * @param[in] notified - pointer to the notified variable + */ +static inline void wait_for_notified(atomic_flag *notified) +{ + unsigned int flags; + + do { + + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(notified)) { + metal_irq_restore_enable(flags); + break; + } + metal_cpu_yield(); + metal_irq_restore_enable(flags); + } while(1); +} + +/** + * @brief dump_buffer() - print hex value of each byte in the buffer + * + * @param[in] buf - pointer to the buffer + * @param[in] len - len of the buffer + */ +static inline void dump_buffer(void *buf, unsigned int len) +{ + unsigned int i; + unsigned char *tmp = (unsigned char *)buf; + + for (i = 0; i < len; i++) { + printf(" %02x", *(tmp++)); + if (!(i % 20)) + printf("\n"); + } + printf("\n"); +} + +/** + * @brief print_demo() - print demo string + * + * @param[in] name - demo name + */ +static inline void print_demo(char *name) +{ + LPRINTF("****** libmetal demo: %s ******\n", name); +} + +/** + * @brief ipi_kick_register_handler() - register for IPI kick handler + * + * @param[in] hd - handler function + * @param[in] priv - private data will be passed to the handler + */ +void ipi_kick_register_handler(metal_irq_handler hd, void *priv); + +/** + * @brief init_ipi() - Initialize IPI + * + * @return return 0 for success, negative value for failure. + */ +int init_ipi(void); + +/** + * @brief deinit_ipi() - Deinitialize IPI + */ +void deinit_ipi(void); + +/** + * @brief kick_ipi() - kick remote with IPI + */ +void kick_ipi(void *msg); + +/** + * @brief disable_ipi_kick() - disable IPI interrupt from remote kick + */ +void disable_ipi_kick(void); + +/** + * @brief enable_ipi_kick() - enable IPI interrupt from remote kick + */ +void enable_ipi_kick(void); + +#endif /* __COMMON_H__ */ diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c new file mode 100644 index 00000000..53efaaba --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the following steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 + +/* Shared memory offset */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define ITERATIONS 1000 + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + /* stop RPU -> APU timer */ + stop_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief measure_ipi_latency() - Measure latency of IPI + * Repeatedly kick IPI to notify the remote and then wait for IPI kick + * from RPU and measure the latency. Similarly, measure the latency + * from RPU to APU. Each iteration, record this latency and after the + * loop has finished, report the total latency in nanseconds. + * Notes: + * - RPU will repeatedly wait for IPI from APU until APU + * notifies remote demo has finished by setting the value in the + * shared memory. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_ipi_latency(struct channel_s *ch) +{ + uint32_t apu_to_rpu_sum = 0, rpu_to_apu_sum = 0; + int i; + + LPRINTF("Starting IPI latency task\n"); + /* write to shared memory to indicate demo has started */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + for ( i = 1; i <= ITERATIONS; i++) { + /* Reset TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + /* irq handler stops timer for rpu->apu irq */ + wait_for_notified(&ch->remote_nkicked); + + apu_to_rpu_sum += read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + rpu_to_apu_sum += read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + } + + /* write to shared memory to indicate demo has finished */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + + /* report avg latencies */ + LPRINTF("IPI latency result with %i iterations:\n", ITERATIONS); + LPRINTF("APU to RPU average latency: %u ns \n", + apu_to_rpu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + LPRINTF("RPU to APU average latency: %u ns \n", + rpu_to_apu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + LPRINTF("Finished IPI latency task\n"); + return 0; +} + +int ipi_latency_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("IPI latency"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_ipi_latency(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c new file mode 100644 index 00000000..c5ef173f --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_shmem_demo.c - shared memory with IPI demo + * This demo will: + * 1. Open the shared memory device. + * 2. Open the IPI device. + * 3. Register IPI interrupt handler. + * 4. Write message to the shared memory. + * 5. Kick IPI to notify there is a message written to the shared memory + * 6. Wait until the remote has kicked the IPI to notify the remote + * has echoed back the message. + * 7. Read the message from shared memory. + * 8. Verify the message + * 9. Repeat step 4 to 8 for 100 times. + * 10. Clean up: deregister the IPI interrupt handler, close the IPI device + * , close the shared memory device. + * + * Here is the Shared memory structure of this demo: + * |0x0 - 0x03 | number of APU to RPU buffers available to RPU | + * |0x04 - 0x07 | number of APU to RPU buffers consumed by RPU | + * |0x08 - 0x1FFC | address array for shared buffers from APU to RPU | + * |0x2000 - 0x2003 | number of RPU to APU buffers available to APU | + * |0x2004 - 0x2007 | number of RPU to APU buffers consumed by APU | + * |0x2008 - 0x3FFC | address array for shared buffers from RPU to APU | + * |0x04000 - 0x103FFC | APU to RPU buffers | + * |0x104000 - 0x203FFC | RPU to APU buffers | + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* Shared memory offsets */ +#define SHM_DESC_OFFSET_TX 0x0 +#define SHM_BUFF_OFFSET_TX 0x04000 +#define SHM_DESC_OFFSET_RX 0x02000 +#define SHM_BUFF_OFFSET_RX 0x104000 + +/* Shared memory descriptors offset */ +#define SHM_DESC_AVAIL_OFFSET 0x00 +#define SHM_DESC_USED_OFFSET 0x04 +#define SHM_DESC_ADDR_ARRAY_OFFSET 0x08 + +#define PKGS_TOTAL 1024 + +#define BUF_SIZE_MAX 512 +#define SHUTDOWN "shutdown" + +#define NS_PER_S (1000 * 1000 * 1000) + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +static atomic_flag remote_nkicked; /* is remote kicked, 0 - kicked, + 1 - not-kicked */ + +/** + * @brief get_timestamp() - Get the timestamp + * IT gets the timestamp and return nanoseconds. + * + * @return nano seconds. + */ +static unsigned long long get_timestamp (void) +{ + unsigned long long t = 0; + struct timespec tp; + int r; + + r = clock_gettime(CLOCK_MONOTONIC, &tp); + if (r == -1) { + LPERROR("Bad clock_gettime!\n"); + return t; + } else { + t = tp.tv_sec * (NS_PER_S); + t += tp.tv_nsec; + } + return t; +} + +static int ipi_irq_handler (int vect_id, void *priv) +{ + (void)vect_id; + (void)priv; + + atomic_flag_clear(&remote_nkicked); + return METAL_IRQ_HANDLED; +} + +/** + * @brief ipi_shmem_echo() - shared memory IPI demo + * This task will: + * * Get the timestamp and put it into the ping shared memory + * * Update the shared memory descriptor for the new available + * ping buffer. + * * Trigger IPI to notifty the remote. + * * Repeat the above steps until it sends out all the packages. + * * Monitor IPI interrupt, verify every received package. + * * After all the packages are received, it sends out shutdown + * message to the remote. + * + * @param[in] shm_io - shared memory metal i/o region + * @return - return 0 on success, otherwise return error number indicating + * type of error. + */ +static int ipi_shmem_echo(struct metal_io_region *shm_io) +{ + int ret; + uint32_t i; + uint32_t rx_avail; + unsigned long tx_avail_offset, rx_avail_offset; + unsigned long rx_used_offset; + unsigned long tx_addr_offset, rx_addr_offset; + unsigned long tx_data_offset, rx_data_offset; + unsigned long long tstart, tend; + long long tdiff; + long long tdiff_avg_s = 0, tdiff_avg_ns = 0; + void *txbuf = NULL, *rxbuf = NULL, *tmpptr; + struct msg_hdr_s *msg_hdr; + uint32_t tx_phy_addr_32; + + txbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!txbuf) { + LPERROR("Failed to allocate local tx buffer for msg.\n"); + ret = -ENOMEM; + goto out; + } + rxbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!rxbuf) { + LPERROR("Failed to allocate local rx buffer for msg.\n"); + ret = -ENOMEM; + goto out; + } + + /* Clear shared memory */ + metal_io_block_set(shm_io, 0, 0, metal_io_region_size(shm_io)); + + /* Set tx/rx buffer address offset */ + tx_avail_offset = SHM_DESC_OFFSET_TX + SHM_DESC_AVAIL_OFFSET; + rx_avail_offset = SHM_DESC_OFFSET_RX + SHM_DESC_AVAIL_OFFSET; + rx_used_offset = SHM_DESC_OFFSET_RX + SHM_DESC_USED_OFFSET; + tx_addr_offset = SHM_DESC_OFFSET_TX + SHM_DESC_ADDR_ARRAY_OFFSET; + rx_addr_offset = SHM_DESC_OFFSET_RX + SHM_DESC_ADDR_ARRAY_OFFSET; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + rx_data_offset = SHM_DESC_OFFSET_RX + SHM_BUFF_OFFSET_RX; + + LPRINTF("Start echo flood testing....\n"); + LPRINTF("Sending msgs to the remote.\n"); + + for (i = 0; i < PKGS_TOTAL; i++) { + + /* Construct a message to send */ + tmpptr = txbuf; + msg_hdr = tmpptr; + msg_hdr->index = i; + msg_hdr->len = sizeof(tstart); + tmpptr += sizeof(struct msg_hdr_s); + tstart = get_timestamp(); + *(unsigned long long *)tmpptr = tstart; + + /* copy message to shared buffer */ + metal_io_block_write(shm_io, tx_data_offset, msg_hdr, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + /* Write to the address array to tell the other end + * the buffer address. + */ + tx_phy_addr_32 = (uint32_t)metal_io_phys(shm_io, + tx_data_offset); + metal_io_write32(shm_io, tx_addr_offset, tx_phy_addr_32); + tx_data_offset += sizeof(struct msg_hdr_s) + msg_hdr->len; + tx_addr_offset += sizeof(uint32_t); + + /* Increase number of available buffers */ + metal_io_write32(shm_io, tx_avail_offset, (i + 1)); + /* Kick IPI to notify data has been put to shared buffer */ + kick_ipi(NULL); + } + + LPRINTF("Waiting for messages to echo back and verify.\n"); + i = 0; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + while (i != PKGS_TOTAL) { + wait_for_notified(&remote_nkicked); + rx_avail = metal_io_read32(shm_io, rx_avail_offset); + while (i != rx_avail) { + uint32_t rx_phy_addr_32; + + /* Received pong from the other side */ + + /* Get the buffer location from the shared memory + * rx address array. + */ + rx_phy_addr_32 = metal_io_read32(shm_io, + rx_addr_offset); + rx_data_offset = metal_io_phys_to_offset(shm_io, + (metal_phys_addr_t)rx_phy_addr_32); + if (rx_data_offset == METAL_BAD_OFFSET) { + LPERROR("failed to get rx [%d] offset: 0x%x.\n", + i, rx_phy_addr_32); + ret = -EINVAL; + goto out; + } + rx_addr_offset += sizeof(rx_phy_addr_32); + + /* Read message header from shared memory */ + metal_io_block_read(shm_io, rx_data_offset, rxbuf, + sizeof(struct msg_hdr_s)); + msg_hdr = (struct msg_hdr_s *)rxbuf; + + /* Check if the message header is valid */ + if (msg_hdr->index != (uint32_t)i) { + LPERROR("wrong msg: expected: %d, actual: %d\n", + i, msg_hdr->index); + ret = -EINVAL; + goto out; + } + if (msg_hdr->len != sizeof(tstart)) { + LPERROR("wrong msg: length invalid: %lu, %u.\n", + sizeof(tstart), msg_hdr->len); + ret = -EINVAL; + goto out; + } + /* Read message */ + rx_data_offset += sizeof(*msg_hdr); + metal_io_block_read(shm_io, + rx_data_offset, + rxbuf + sizeof(*msg_hdr), msg_hdr->len); + rx_data_offset += msg_hdr->len; + /* increase rx used count to indicate it has consumed + * the received data */ + metal_io_write32(shm_io, rx_used_offset, (i + 1)); + + /* Verify message */ + /* Get tx message previously sent*/ + metal_io_block_read(shm_io, tx_data_offset, txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + tx_data_offset += sizeof(*msg_hdr) + sizeof(tstart); + /* Compare the received message and the sent message */ + ret = memcmp(rxbuf, txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + if (ret) { + LPERROR("data[%u] verification failed.\n", i); + LPRINTF("Expected:"); + dump_buffer(txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + LPRINTF("Actual:"); + dump_buffer(rxbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + ret = -EINVAL; + goto out; + } + + i++; + } + } + tend = get_timestamp(); + tdiff = tend - tstart; + + /* Send shutdown message */ + tmpptr = txbuf; + msg_hdr = tmpptr; + msg_hdr->index = i; + msg_hdr->len = strlen(SHUTDOWN); + tmpptr += sizeof(struct msg_hdr_s); + sprintf(tmpptr, SHUTDOWN); + /* copy message to shared buffer */ + metal_io_block_write(shm_io, + tx_data_offset, + msg_hdr, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + tx_phy_addr_32 = (uint32_t)metal_io_phys(shm_io, + tx_data_offset); + metal_io_write32(shm_io, tx_addr_offset, tx_phy_addr_32); + metal_io_write32(shm_io, tx_avail_offset, PKGS_TOTAL + 1); + LPRINTF("Kick remote to notify shutdown message sent...\n"); + kick_ipi(NULL); + + tdiff /= PKGS_TOTAL; + tdiff_avg_s = tdiff / NS_PER_S; + tdiff_avg_ns = tdiff % NS_PER_S; + LPRINTF("Total packages: %d, time_avg = %lds, %ldns\n", + i, (long int)tdiff_avg_s, (long int)tdiff_avg_ns); + + ret = 0; +out: + if (txbuf) + metal_free_memory(txbuf); + if (rxbuf) + metal_free_memory(rxbuf); + return ret; +} + +int ipi_shmem_demo() +{ + struct metal_device *shm_dev = NULL; + struct metal_io_region *shm_io = NULL; + int ret = 0; + + print_demo("IPI and shared memory"); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &shm_dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + shm_io = metal_device_io_region(shm_dev, 0); + if (!shm_io) { + LPERROR("Failed to map io region for %s.\n", shm_dev->name); + ret = -ENODEV; + goto out; + } + + /* initialize remote_nkicked */ + remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, NULL); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = ipi_shmem_echo(shm_io); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (shm_dev) + metal_device_close(shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c new file mode 100644 index 00000000..6dfaf7e1 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * libmetal_amp_demo.c + * + * This application shows how to use IPI to trigger interrupt and how to + * setup shared memory with libmetal API for communication between processors. + * + * This app does the following: + * 1. Run the shared memory echo demo task ipi_shmem_task() + * * Write message to the APU to RPU shared buffer. + * * Update the APU to RPU shared memory available index. + * * Trigger IPI to the remote. + * * Repeat the above 3 sub steps until it sends all the packages. + * * Wait for IPI to receive all the packages + * * If "shutdown" message is received, cleanup the libmetal source. + * 2. Run shared memory demo with shmem_task(). + * * Open shared memory device. + * * For 1000 times, communicate between local and remote processes + * using shared memory and polling via shared memory. + * * Cleanup shared memory device. + * 3. Run the atomic demo task atomic_shmem_task(): + * * Trigger the IPI to the remote, the remote will then start doing atomic + * add calculation. + * * Start atomic add by 1 for 1000 times to the first 32bit of the shared + * memory descriptor location. + * * Once it receives the IPI interrupt, it will check if the value stored + * in the shared memory descriptor location is 2000. If yes, the atomic + * across the shared memory passed, otherwise, it failed. + * 4. Demonstrate IPI latency with ipi_latency_demo_task() + * * Open IPI and timer devices. + * * For 1000 times, record APU to RPU IPI latency and RPU to APU + * latency. Then report average time for each direction. + * * Cleanup libmetal resources + * 5. Demonstrate shared memory latency with shmem_latency_demo_task() + * * Open shared memory and timer devices. + * * For 1000 times, record APU to RPU shared memory latency and RPU to APU + * latency for 8 bytes, 1/2K and 1K. Then report average time for each + * direction. + * * Cleanup libmetal resources + * 6. Demonstrate shared memory throughput with shmem_throughput_demo_task() + * * Open shared memory, IPI and timer devices. + * * For 1000 times, record APU block read and write times. Notify remote + * to run test, then similarly record RPU block read and write times for + * 1/2KB, 1KB and 2KB. Then report average throughput for each data size + * and operation. + * * Cleanup libmetal resources + */ + +#include +#include +#include +#include +#include "common.h" +#include "sys_init.h" + +/** + * @brief main function of the demo application. + * Here are the steps for the main function: + * * initialize libmetal environment + * * Run the IPI with shared memory demo. + * * Run the shared memory demo. + * * Run the atomic across shared memory demo. + * * Run the ipi latency demo. + * * Run the shared memory latency demo. + * * Run the shared memory throughput demo. + * * Cleanup libmetal environment + * Report if any of the above tasks failed. + * @return 0 - succeeded, non-zero for failures. + */ +int main(void) +{ + int ret; + + ret = sys_init(); + if (ret) { + LPERROR("Failed to initialize system.\n"); + return ret; + } + + ret = shmem_demo(); + if (ret) { + LPERROR("shared memory demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = atomic_shmem_demo(); + if (ret) { + LPERROR("shared memory atomic demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = ipi_shmem_demo(); + if (ret) { + LPERROR("IPI and shared memory demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = ipi_latency_demo(); + if (ret) { + LPERROR("IPI latency demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = shmem_latency_demo(); + if (ret) { + LPERROR("shared memory latency demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = shmem_throughput_demo(); + if (ret) { + LPERROR("shared memory throughput demo failed.\n"); + goto out; + } + +out: + sys_cleanup(); + + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c new file mode 100644 index 00000000..653ecac9 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2016, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sys_init.h" + +#define IPI_TRIG_OFFSET 0x0 +#define IPI_OBS_OFFSET 0x4 +#define IPI_ISR_OFFSET 0x10 +#define IPI_IMR_OFFSET 0x14 +#define IPI_IER_OFFSET 0x18 +#define IPI_IDR_OFFSET 0x1C + +#define IPI_MASK 0x1000000 + +#define IPI_DEV_NAME "ff310000.ipi" +#define SHM0_DESC_DEV_NAME "3ed00000.shm_desc" +#define SHM1_DESC_DEV_NAME "3ed10000.shm_desc" +#define SHM_DEV_NAME "3ed20000.shm" +#define BUS_NAME "platform" +#define D0_SHM_OFFSET 0x00000 +#define D1_SHM_OFFSET 0x20000 + +#define BUF_SIZE_MAX 512 +#define SHUTDOWN "shutdown" + +#define LPRINTF(format, ...) \ + printf("SERVER> " format, ##__VA_ARGS__) + +struct shm_mg_s { + uint32_t avails; + uint32_t used; +}; + +typedef uint64_t shm_addr_t; + +struct msg_hdr_s { + uint32_t index; + int32_t len; +}; + +struct channel_s { + struct metal_device *ipi_dev; + struct metal_io_region *ipi_io; + unsigned int ipi_mask; + struct metal_device *shm0_desc_dev; + struct metal_io_region *shm0_desc_io; + struct metal_device *shm1_desc_dev; + struct metal_io_region *shm1_desc_io; + struct metal_device *shm_dev; + struct metal_io_region *shm_io; + atomic_int notified; + unsigned long d0_start_offset; + unsigned long d1_start_offset; +}; + +static struct channel_s ch0; + +extern int system_init(); +extern int run_comm_task(void *task, void *arg); +extern void wait_for_interrupt(void); + +static int ipi_irq_isr (int vect_id, void *priv) +{ + (void)vect_id; + struct channel_s *ch = (struct channel_s *)priv; + uint64_t val = 1; + + if (!ch) + return METAL_IRQ_NOT_HANDLED; + val = metal_io_read32(ch->ipi_io, IPI_ISR_OFFSET); + if (val & ch->ipi_mask) { + metal_io_write32(ch->ipi_io, IPI_ISR_OFFSET, ch->ipi_mask); + atomic_flag_clear(&ch->notified); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +static int ipi_task_shm_atomicd(void *arg) +{ + struct channel_s *ch = (struct channel_s *)arg; + atomic_int *shm_int; + unsigned int flags; + int i; + + shm_int = (atomic_int *)metal_io_virt(ch->shm0_desc_io, 0); + + LPRINTF("Wait for atomic test to start.\n"); + while (1) { + do { + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(&ch->notified)) { + metal_irq_restore_enable(flags); + break; + } + wait_for_interrupt(); + metal_irq_restore_enable(flags); + } while(1); + for (i = 0; i < 1000; i++) + atomic_fetch_add(shm_int, 1); + //*((unsigned int volatile *)shm_int) += 1; + /* memory barrier */ + atomic_thread_fence(memory_order_acq_rel); + + /* Send the message */ + LPRINTF("SENDING message...\n"); + metal_io_write32(ch->ipi_io, IPI_TRIG_OFFSET, ch->ipi_mask); + break; + } + + return 0; +} + +static int ipi_task_echod(void *arg) +{ + struct channel_s *ch = (struct channel_s *)arg; + struct shm_mg_s *shm0_mg, *shm1_mg; + shm_addr_t *shm0_addr_array, *shm1_addr_array; + struct msg_hdr_s *msg_hdr; + unsigned int flags; + void *d0, *d1, *lbuf; + metal_phys_addr_t d0_pa; + int len; + + shm0_mg = (struct shm_mg_s *)metal_io_virt(ch->shm0_desc_io, 0); + shm1_mg = (struct shm_mg_s *)metal_io_virt(ch->shm1_desc_io, 0); + shm0_addr_array = (void *)shm0_mg + sizeof(struct shm_mg_s); + shm1_addr_array = (void *)shm1_mg + sizeof(struct shm_mg_s); + d1 = metal_io_virt(ch->shm_io, ch->d1_start_offset); + lbuf = malloc(BUF_SIZE_MAX); + if (!lbuf) { + LPRINTF("ERROR: Failed to allocate local buffer for msg.\n"); + return -1; + } + + LPRINTF("Wait for echo test to start.\n"); + while (1) { + do { + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(&ch->notified)) { + metal_irq_restore_enable(flags); + break; + } + wait_for_interrupt(); + metal_irq_restore_enable(flags); + } while(1); + atomic_thread_fence(memory_order_acq_rel); + while(shm0_mg->used != shm0_mg->avails) { + d0_pa = (metal_phys_addr_t)shm0_addr_array[shm0_mg->used]; + d0 = metal_io_phys_to_virt(ch->shm_io, d0_pa); + if (!d0) { + LPRINTF("ERROR: failed to get rx addr:0x%lx.\n", + d0_pa); + goto out; + } + /* Copy msg header from shared buf to local mem */ + len = metal_io_block_read(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d0), + lbuf, sizeof(struct msg_hdr_s)); + if (len < (int)sizeof(struct msg_hdr_s)) { + LPRINTF("ERROR: failed to get msg header.\n"); + goto out; + } + msg_hdr = lbuf; + if (msg_hdr->len < 0) { + LPRINTF("ERROR: wrong msg length: %d.\n", + (int)msg_hdr->len); + goto out; + } else { + /* copy msg data from shared buf to local mem */ + d0 += sizeof(struct msg_hdr_s); + len = metal_io_block_read(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d0), + lbuf + sizeof(struct msg_hdr_s), + msg_hdr->len); +#if DEBUG + LPRINTF("received: %d, %d\n", + (int)msg_hdr->index, (int)msg_hdr->len); +#endif + /* Check if the it is the shutdown message */ + if (!strncmp((lbuf + sizeof(struct msg_hdr_s)), + SHUTDOWN, sizeof(SHUTDOWN))) { + LPRINTF("Received shutdown message\n"); + goto out; + } + } + /* Copy the message back to the other end */ + metal_io_block_write(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d1), + lbuf, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + /* Update the d1 address */ + shm1_addr_array[shm1_mg->avails] = + (uint64_t)metal_io_virt_to_phys( + ch->shm_io, d1); + d1 += (sizeof(struct msg_hdr_s) + msg_hdr->len); + shm0_mg->used++; + shm1_mg->avails++; + /* memory barrier */ + atomic_thread_fence(memory_order_acq_rel); + + /* Send the message */ + metal_io_write32(ch->ipi_io, IPI_TRIG_OFFSET, + ch->ipi_mask); + } + } + +out: + free(lbuf); + return 0; +} + +int main(void) +{ + struct metal_device *device; + struct metal_io_region *io; + int irq; + uint32_t val; + int ret = 0; + + ret = sys_init(); + if (ret) { + LPRINTF("ERROR: Failed to initialize system\n"); + return -1; + } + memset(&ch0, 0, sizeof(ch0)); + + atomic_store(&ch0.notified, 1); + + /* Open IPI device */ + ret = metal_device_open(BUS_NAME, IPI_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", IPI_DEV_NAME); + goto out; + } + + /* Map IPI device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + + /* Store the IPI device and I/O region */ + ch0.ipi_dev = device; + ch0.ipi_io = io; + + /* Open shared memory0 descriptor device */ + ret = metal_device_open(BUS_NAME, SHM0_DESC_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", + SHM0_DESC_DEV_NAME); + goto out; + } + + /* Map shared memory0 descriptor device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + /* Store the shared memory0 descriptor device and I/O region */ + ch0.shm0_desc_dev = device; + ch0.shm0_desc_io = io; + + /* Open shared memory1 descriptor device */ + ret = metal_device_open(BUS_NAME, SHM1_DESC_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", + SHM1_DESC_DEV_NAME); + goto out; + } + + /* Map shared memory1 descriptor device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + /* Store the shared memory0 descriptor device and I/O region */ + ch0.shm1_desc_dev = device; + ch0.shm1_desc_io = io; + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Map shared memory device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + + /* Store the shared memory device and I/O region */ + ch0.shm_dev = device; + ch0.shm_io = io; + ch0.d1_start_offset = D1_SHM_OFFSET; + + /* Get interrupt ID from IPI metal device */ + irq = (intptr_t)ch0.ipi_dev->irq_info; + if (irq < 0) { + LPRINTF("ERROR: Failed to request interrupt for %s.\n", + device->name); + ret = -EINVAL; + goto out; + } + + ch0.ipi_mask = IPI_MASK; + + LPRINTF("Try to register IPI interrupt.\n"); + ret = metal_irq_register(irq, ipi_irq_isr, &ch0); + LPRINTF("registered IPI interrupt.\n"); + if (ret) + goto out; + metal_irq_enable(irq); + + /* Enable interrupt */ + metal_io_write32(ch0.ipi_io, IPI_IER_OFFSET, ch0.ipi_mask); + val = metal_io_read32(ch0.ipi_io, IPI_IMR_OFFSET); + if (val & ch0.ipi_mask) { + LPRINTF("ERROR: Failed to enable IPI interrupt.\n"); + ret = -1; + goto out; + } + LPRINTF("enabled IPI interrupt.\n"); + ret = ipi_task_shm_atomicd((void *)&ch0); + if (ret) { + LPRINTF("ERROR: Failed to run shared memory atomic task.\n"); + goto out; + } + ret = ipi_task_echod((void*)&ch0); + if (ret) + LPRINTF("ERROR: Failed to run IPI communication task.\n"); + +out: + if (ch0.ipi_dev) + metal_device_close(ch0.ipi_dev); + if (ch0.shm0_desc_dev) + metal_device_close(ch0.shm0_desc_dev); + if (ch0.shm1_desc_dev) + metal_device_close(ch0.shm1_desc_dev); + if (ch0.shm_dev) + metal_device_close(ch0.shm_dev); + sys_cleanup(); + + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c new file mode 100644 index 00000000..017affb9 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * atomic_shmem_demod.c - Shared memory atomic operation demo + * This demo will: + * + * 1. Open the shared memory device. + * 2. Open the IPI device. + * 3. Register IPI interrupt handler. + * 4. Kick IPI to notify the other end to start the demo + * 5. Start atomic add by 1 for 5000 times over the shared memory + * 6. Wait for remote IPI kick to know when the remote has finished the demo. + * 7. Verify the result. As two sides both have done 5000 times of adding 1, + * check if the end result is 5000*2. + * 8. Clean up: deregister the IPI interrupt handler, close the IPI device + * , close the shared memory device. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define ATOMIC_INT_OFFSET 0x0 /* shared memory offset for atomic operation */ +#define ITERATIONS 5000 + +static atomic_flag remote_nkicked; /* is remote kicked, 0 - kicked, + 1 - not-kicked */ + +static int ipi_irq_handler (int vect_id, void *priv) +{ + (void)vect_id; + (void)priv; + atomic_flag_clear(&remote_nkicked); + return METAL_IRQ_HANDLED; +} + +/** + * @brief atomic_add_shmem() - Shared memory atomic operation demo + * This task will: + * * Write to shared memory to notify the remote to start atomic add on + * the shared memory for 1000 times. + * * Start atomic add by 1 for 5000 times to first 32 bits of memory in + * the shared memory which is pointed to by shm_io. + * * Wait for the remote to write to shared memory + * * Once it received the polling kick from the remote, it will check + * if the value stored in the shared memory is the same as the + * expected. + * * It will print if the atomic add test has passed or not. + * @param[in] shm_io - shared memory metal i/o region + * @return - If setup failed, return the corresponding error number. Otherwise + * return 0 on success. + */ +static int atomic_add_shmem(struct metal_io_region *shm_io) +{ + int i, ret; + atomic_int *shm_int; + + LPRINTF("Starting atomic shared memory task.\n"); + + /* Initialize the shared memory on which we run the atomic add */ + shm_int = (atomic_int *)metal_io_virt(shm_io, + ATOMIC_INT_OFFSET); + atomic_store(shm_int, 0); + + /* Kick the remote to notify demo starts. */ + kick_ipi(NULL); + + /* Do atomic add over the shared memory */ + for (i = 0; i < ITERATIONS; i++) { + atomic_fetch_add(shm_int, 1); + } + + /* Wait for kick from RPU to know when RPU finishes the demo */ + wait_for_notified(&remote_nkicked); + + if (atomic_load(shm_int) == (ITERATIONS << 1 )) { + LPRINTF("shm atomic demo PASSED!\n"); + ret = 0; + } else { + LPRINTF("shm atomic demo FAILED. expected: %u, actual: %u\n", + (unsigned int)(ITERATIONS << 1), atomic_load(shm_int)); + ret = -1; + } + + return ret; +} + +int atomic_shmem_demo() +{ + struct metal_device *shm_dev = NULL; + struct metal_io_region *shm_io = NULL; + int ret = 0; + + print_demo("atomic operation over shared memory"); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &shm_dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + shm_io = metal_device_io_region(shm_dev, 0); + if (!shm_io) { + LPERROR("Failed to map io region for %s.\n", shm_dev->name); + ret = -ENODEV; + goto out; + } + + /* initialize remote_nkicked */ + remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, NULL); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = atomic_add_shmem(shm_io); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (shm_dev) + metal_device_close(shm_dev); + return ret; + +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c new file mode 100644 index 00000000..bf576a79 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * shmem_demo.c + * This demo demonstrates the use of shared mem. between the APU and RPU. + * This demo does so via the following steps: + * + * 1. Open the shared memory device. + * 2. Clear the demo control TX/RX available values in shared memory. + * 3. APU set demo control in shared memory to notify RPU demo has started + * 4. APU will write message to the shared memory. + * 5. APU will increase TX avail values in the shared memory to notify RPU + * there is a message ready to read. + * 6. APU will poll the RX avail value in th shared memory to see if RPU + * has echoed back the message into the shared memory. + * 7. When APU knows there is new RX message available, it will read the + * RX message from the shared memory. + * 8. APU will verify the message to see if it matches the one it has sent. + * 9. Close the shared memory device. + * + * Here is the Shared memory structure of this demo: + * |0 | 4Bytes | DEMO control status shows if demo starts or not | + * |0x04 | 4Bytes | number of APU to RPU buffers available to RPU | + * |0x08 | 4Bytes | number of APU to RPU buffers consumed by RPU | + * |0x0c | 4Bytes | number of RPU to APU buffers available to APU | + * |0x10 | 4Bytes | number of RPU to APU buffers consumed by APU | + * |0x14 | 1KBytes | APU to RPU buffer | + * ... ... + * |0x800 | 1KBytes | RPU to APU buffer | + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* Shared memory offsets */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 +#define SHM_TX_AVAIL_OFFSET 0x04 +#define SHM_RX_AVAIL_OFFSET 0x0C +#define SHM_TX_BUFFER_OFFSET 0x14 +#define SHM_RX_BUFFER_OFFSET 0x800 + +#define SHM_BUFFER_SIZE 0x400 + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define TEST_MSG "Hello World - libmetal shared memory demo" + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +/** + * @brief shmem_task() - Show use of shared memory with Libmetal. + * Write message to RPU. RPU will then read and echo + * back. Confirm if echoed message is identical. + * If messages differ, report error. + * + * Steps: + * 1. Clear demo control and TX/RX available values + * + * @param[in] shm_io - metal i/o region of the shared memory + * @return - return 0 on success, otherwise return error number indicating + * type of error + */ +int shmem_echo(struct metal_io_region *shm_io) +{ + void *tx_data = NULL; + void *rx_data = NULL; + unsigned int tx_count = 0; + unsigned int rx_count = 0; + struct msg_hdr_s *msg_hdr; + unsigned int data_len; + int ret; + + LPRINTF("Setting up shared memory demo.\n"); + /* clear demo status value */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Clear TX/RX avail */ + metal_io_write32(shm_io, SHM_TX_AVAIL_OFFSET, 0); + metal_io_write32(shm_io, SHM_RX_AVAIL_OFFSET, 0); + + LPRINTF("Starting shared memory demo.\n"); + /* Notify the remote the demo starts */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + /* preparing data to send */ + data_len = sizeof(struct msg_hdr_s) + strlen(TEST_MSG) + 1; + tx_data = metal_allocate_memory(data_len); + if (!tx_data) { + LPERROR("Failed to allocate memory.\n"); + ret = -1; + goto out; + } + msg_hdr = (struct msg_hdr_s *)tx_data; + msg_hdr->index = tx_count; + msg_hdr->len = strlen(TEST_MSG) + 1; + sprintf(tx_data + sizeof(*msg_hdr), TEST_MSG); + LPRINTF("Sending message: %s\n", + (char *)(tx_data + sizeof(*msg_hdr))); + + /* write data to the shared memory*/ + ret = metal_io_block_write(shm_io, SHM_TX_BUFFER_OFFSET, + tx_data, data_len); + if (ret < 0){ + LPERROR("Unable to metal_io_block_write()\n"); + goto out; + } + /* Increase number of buffers available to notify the remote */ + tx_count++; + metal_io_write32(shm_io, SHM_TX_AVAIL_OFFSET, tx_count); + + /* wait for remote to echo back the data */ + while (metal_io_read32(shm_io, SHM_RX_AVAIL_OFFSET) == rx_count); + rx_count++; + /* New RX data is available, allocate buffer to received data */ + rx_data = metal_allocate_memory(data_len); + if (!rx_data) { + LPERROR("Failed to allocate memory\n"); + ret = -1; + goto out; + } + /* read data from the shared memory*/ + metal_io_block_read(shm_io, SHM_RX_BUFFER_OFFSET, + rx_data, data_len); + if (ret < 0){ + LPERROR("Unable to metal_io_block_read()\n"); + goto out; + } + /* verify the received data */ + ret = memcmp(tx_data, rx_data, data_len); + if (ret) { + LPERROR("Received data verification failed.\n"); + LPRINTF("Expected:"); + dump_buffer(tx_data, data_len); + LPRINTF("Actual:"); + dump_buffer(rx_data, data_len); + } else { + LPRINTF("Message Received: %s\n", + (char *)(rx_data + sizeof(*msg_hdr))); + } + /* Notify the remote the demo has finished. */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_IDLE); + +out: + if (tx_data) + metal_free_memory(tx_data); + if (rx_data) + metal_free_memory(rx_data); + LPRINTF("Shared memory demo: %s.\n", ret ? "Failed": "Passed" ); + return ret; +} + +int shmem_demo() +{ + struct metal_device *device = NULL; + struct metal_io_region *io = NULL; + int ret = 0; + + print_demo("shared memory"); + + /* Open the shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &device); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + return ret; + } + + /* get shared memory device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPERROR("Failed to get io region for %s.\n", device->name); + ret = -ENODEV; + goto out; + } + + /* Run the demo */ + ret = shmem_echo(io); + +out: + if (device) + metal_device_close(device); + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c new file mode 100644 index 00000000..3133015a --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * shmem_latency_demo.c + * This demo demonstrates the shared mem. latency between the APU and RPU. + * This demo does so via the following steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter, write data to the shared memory, then + * kick IPI to notify the remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler marks the + * remote has kicked. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 + +/* Shared memory offset */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 /* Shared memory for the demo status */ +#define SHM_BUFF_OFFSET_TX 0x1000 /* Shared memory TX buffer start offset */ +#define SHM_BUFF_OFFSET_RX 0x2000 /* Shared memory RX buffer start offset */ + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define ITERATIONS 1000 + +#define BUF_SIZE_MAX 4096 +#define PKG_SIZE_MIN 16 +#define PKG_SIZE_MAX 1024 + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief measure_shmem_latency() - Measure latency of using shared memory + * and IPI with libmetal. + * Repeatedly send a message to RPU and then detect IPI from RPU + * and measure the latency. Similarly, measure the latency from RPU + * to APU. Each iteration, record this latency and after the loop + * has finished, report the total latency in nanseconds. + * Notes: + * - RPU will repeatedly wait for IPI from APU until APU + * notifies remote demo has finished by setting the value in the + * shared memory. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_shmem_latency(struct channel_s *ch) +{ + uint32_t apu_to_rpu_sum = 0, rpu_to_apu_sum = 0; + int i; + size_t s; + struct msg_hdr_s *msg_hdr; + void *lbuf; + int ret; + + LPRINTF("Starting IPI latency task\n"); + /* allocate memory for receiving data */ + lbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!lbuf) { + LPERROR("Failed to allocate memory.\r\n"); + return -1; + } + memset(lbuf, 0xA, BUF_SIZE_MAX); + + /* write to shared memory to indicate demo has started */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + for (s = PKG_SIZE_MIN; s <= PKG_SIZE_MAX; s <<= 1) { + for (i = 1; i <= ITERATIONS; i++) { + /* Reset TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* prepare data */ + msg_hdr = lbuf; + msg_hdr->index = i; + msg_hdr->len = s - sizeof(*msg_hdr); + /* Copy data to the shared memory */ + ret = metal_io_block_write(ch->shm_io, + SHM_BUFF_OFFSET_TX, lbuf, s); + if ((size_t)ret != s) { + LPERROR("Write shm failure: %lu,%lu\n", + s, (size_t)ret); + ret = -1; + goto out; + } + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + /* irq handler stops timer for rpu->apu irq */ + wait_for_notified(&ch->remote_nkicked); + /* Read message */ + metal_io_block_read(ch->shm_io, + SHM_BUFF_OFFSET_RX, + lbuf, s); + msg_hdr = lbuf; + if (msg_hdr->len != (s - sizeof(*msg_hdr))) { + LPERROR("Read shm failure: %lu,%lu\n", + s, msg_hdr->len + sizeof(*msg_hdr)); + ret = -1; + goto out; + } + /* Stop RPU to APU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + + apu_to_rpu_sum += read_timer(ch->ttc_io, + TTC_CNT_APU_TO_RPU); + rpu_to_apu_sum += read_timer(ch->ttc_io, + TTC_CNT_RPU_TO_APU); + } + + /* report avg latencies */ + LPRINTF("package size %lu latency result:\n", s); + LPRINTF("APU to RPU average latency: %u ns \n", + apu_to_rpu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + LPRINTF("RPU to APU average latency: %u ns \n", + rpu_to_apu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + } + + /* write to shared memory to indicate demo has finished */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + + LPRINTF("Finished shared memory latency task\n"); + +out: + metal_free_memory(lbuf); + return 0; +} + +int shmem_latency_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("shared memory latency"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_shmem_latency(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c new file mode 100644 index 00000000..a5ff9482 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * shmem_throughput_demo_task.c + * This is the remote side of the shared memory throughput demo. + * This demo does the following steps: + * + * 1. Get the shared memory device libmetal I/O region. + * 1. Get the TTC timer device libemtal I/O region. + * 2. Get IPI device libmetal I/O region and the IPI interrupt vector. + * 3. Register IPI interrupt handler. + * 6. Upload throughput measurement: + * Start TTC APU counter, write data to shared memory and kick IPI to + * notify remote. It will iterate for 1000 times, stop TTC APU counter. + * Wait for RPU IPI kick to know RPU has finished receiving packages + * and RPU TX counter is ready to read. Read the APU TX and RPU RX + * counter values and save them. Repeat for different package sizes. + * After this measurement, kick IPI to notify the remote, the + * measurement has finished. + * 7. Download throughput measurement: + * Start TTC APU counter, wait for IPI kick, check if data is available, + * if yes, read as much data as possible from shared memory. It will + * iterates until 1000 packages have been received, stop TTC APU counter. + * Wait for RPU IPI kick so that APU can get the TTC RPU TX counter + * value. Kick IPI to notify the remote it has read the TTCi counter. + * Repeat for different package size. + * 8. Cleanup resource: + * disable IPI interrupt and deregister the IPI interrupt handler. + * + * Here is the Shared memory structure of this demo: + * |0x0 - 0x03 | number of APU to RPU buffers available to RPU | + * |0x04 - 0x1FFFFF | address array for shared buffers from APU to RPU | + * |0x200000 - 0x200004 | number of RPU to APU buffers available to APU | + * |0x200004 - 0x3FFFFF | address array for shared buffers from RPU to APU | + * |0x400000 - 0x7FFFFF | APU to RPU buffers | + * |0x800000 - 0xAFFFFF | RPU to APU buffers | + */ + +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 + +/* Shared memory offsets */ +#define SHM_DESC_OFFSET_TX 0x0 +#define SHM_BUFF_OFFSET_TX 0x400000 +#define SHM_DESC_OFFSET_RX 0x200000 +#define SHM_BUFF_OFFSET_RX 0x800000 + +/* Shared memory descriptors offset */ +#define SHM_DESC_AVAIL_OFFSET 0x00 +#define SHM_DESC_ADDR_ARRAY_OFFSET 0x04 + +#define ITERATIONS 1000 + +#define BUF_SIZE_MAX 4096 +#define PKG_SIZE_MAX 1024 +#define PKG_SIZE_MIN 16 +#define TOTAL_DATA_SIZE (1024 * 4096) + +#define MB (1024 * 1024) /* Mega Bytes */ + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief measure_shmem_throughput() - Show throughput of using shared memory. + * - Upload throughput measurement: + * Start TTC APU counter, write data to shared memory and kick IPI to + * notify remote. It will iterate for 1000 times, stop TTC APU + * counter. Wait for RPU IPI kick to know RPU has finished receiving + * packages and RPU TX counter is ready to read. Read the APU TX and + * RPU RX counter values and save them. Repeat for different package + * sizes. After this measurement, kick IPI to notify the remote, the + * measurement has finished. + * - Download throughput measurement: + * Start TTC APU counter, wait for IPI kick, check if data is + * available, if yes, read as much data as possible from shared + * memory. It will iterates until 1000 packages have been received, + * stop TTC APU counter. Wait for RPU IPI kick so that APU can get + * the TTC RPU TX counter value. Kick IPI to notify the remote it + * has read the TTCi counter. Repeat for different package size. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_shmem_throughput(struct channel_s* ch) +{ + void *lbuf = NULL; + int ret = 0; + size_t s, i; + uint32_t rx_count, rx_avail, tx_count, iterations; + unsigned long tx_avail_offset, rx_avail_offset; + unsigned long tx_addr_offset, rx_addr_offset; + unsigned long tx_data_offset, rx_data_offset; + uint32_t buf_phy_addr_32; + uint32_t *apu_tx_count = NULL; + uint32_t *apu_rx_count = NULL; + uint32_t *rpu_tx_count = NULL; + uint32_t *rpu_rx_count = NULL; + + /* allocate memory for receiving data */ + lbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!lbuf) { + LPERROR("Failed to allocate memory.\r\n"); + return -ENOMEM; + } + memset(lbuf, 0xA, BUF_SIZE_MAX); + + /* allocate memory for saving counter values */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<=1, i++); + apu_tx_count = metal_allocate_memory(i * sizeof(uint32_t)); + apu_rx_count = metal_allocate_memory(i * sizeof(uint32_t)); + rpu_tx_count = metal_allocate_memory(i * sizeof(uint32_t)); + rpu_rx_count = metal_allocate_memory(i * sizeof(uint32_t)); + if (!apu_tx_count || !apu_rx_count || !rpu_tx_count || !rpu_rx_count) { + LPERROR("Failed to allocate memory.\r\n"); + ret = -ENOMEM; + goto out; + } + + /* Clear shared memory */ + metal_io_block_set(ch->shm_io, 0, 0, metal_io_region_size(ch->shm_io)); + + LPRINTF("Starting shared mem throughput demo\n"); + + /* for each data size, measure send throughput */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + tx_count = 0; + iterations = TOTAL_DATA_SIZE / s; + /* Set tx buffer address offset */ + tx_avail_offset = SHM_DESC_OFFSET_TX + SHM_DESC_AVAIL_OFFSET; + tx_addr_offset = SHM_DESC_OFFSET_TX + + SHM_DESC_ADDR_ARRAY_OFFSET; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + /* Reset APU TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + while (tx_count < iterations) { + /* Write data to the shared memory*/ + metal_io_block_write(ch->shm_io, tx_data_offset, + lbuf, s); + + /* Write to the address array to tell the other end + * the buffer address. + */ + buf_phy_addr_32 = (uint32_t)metal_io_phys(ch->shm_io, + tx_data_offset); + metal_io_write32(ch->shm_io, tx_addr_offset, + buf_phy_addr_32); + tx_data_offset += s; + tx_addr_offset += sizeof(buf_phy_addr_32); + + /* Increase number of available buffers */ + tx_count++; + metal_io_write32(ch->shm_io, tx_avail_offset, + tx_count); + /* Kick IPI to notify RPU data is ready in + * the shared memory */ + kick_ipi(NULL); + } + /* Stop RPU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Wait for RPU to signal RPU RX TTC counter is ready to + * read */ + wait_for_notified(&ch->remote_nkicked); + /* Read TTC counter values */ + apu_tx_count[i] = read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + rpu_rx_count[i] = read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + } + + /* Kick IPI to notify RPU that APU has read the RPU RX TTC counter + * value */ + kick_ipi(NULL); + + /* for each data size, meaasure block read throughput */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + rx_count = 0; + iterations = TOTAL_DATA_SIZE / s; + /* Set rx buffer address offset */ + rx_avail_offset = SHM_DESC_OFFSET_RX + SHM_DESC_AVAIL_OFFSET; + rx_addr_offset = SHM_DESC_OFFSET_RX + + SHM_DESC_ADDR_ARRAY_OFFSET; + rx_data_offset = SHM_DESC_OFFSET_RX + SHM_BUFF_OFFSET_RX; + + wait_for_notified(&ch->remote_nkicked); + /* Data has arrived, seasure start. Reset RPU TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + while (1) { + rx_avail = metal_io_read32(ch->shm_io, rx_avail_offset); + while(rx_count != rx_avail) { + /* Get the buffer location from the shared + * memory rx address array. + */ + buf_phy_addr_32 = metal_io_read32(ch->shm_io, + rx_addr_offset); + rx_data_offset = metal_io_phys_to_offset( + ch->shm_io, + (metal_phys_addr_t)buf_phy_addr_32); + if (rx_data_offset == METAL_BAD_OFFSET) { + LPERROR( + "[%u]failed to get rx offset: 0x%x, 0x%lx.\n", + rx_count, buf_phy_addr_32, + metal_io_phys(ch->shm_io, + rx_addr_offset)); + ret = -EINVAL; + goto out; + } + rx_addr_offset += sizeof(buf_phy_addr_32); + /* Read data from shared memory */ + metal_io_block_read(ch->shm_io, rx_data_offset, + lbuf, s); + rx_count++; + } + if (rx_count < iterations) + /* Need to wait for more data */ + wait_for_notified(&ch->remote_nkicked); + else + break; + } + /* Stop RPU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Clear remote kicked flag -- 0 is kicked */ + atomic_flag_clear(&ch->remote_nkicked); + atomic_flag_test_and_set(&ch->remote_nkicked); + /* Kick IPI to notify remote it is ready to read data */ + kick_ipi(NULL); + /* Wait for RPU to signal RPU TX TTC counter is ready to + * read */ + wait_for_notified(&ch->remote_nkicked); + /* Read TTC counter values */ + apu_rx_count[i] = read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + rpu_tx_count[i] = read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + /* Kick IPI to notify RPU APU has read the RPU TX TTC counter + * value */ + kick_ipi(NULL); + } + + /* Print the measurement result */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + LPRINTF("Shared memory throughput of pkg size %lu : \n", s); + LPRINTF(" APU send: %x, %lu MB/s\n", apu_tx_count[i], + s * iterations * TTC_CLK_FREQ_HZ / apu_tx_count[i] / MB); + LPRINTF(" APU receive: %x, %lu MB/s\n", apu_rx_count[i], + s * iterations * TTC_CLK_FREQ_HZ / apu_rx_count[i] / MB); + LPRINTF(" RPU send: %x, %lu MB/s\n", rpu_tx_count[i], + s * iterations * TTC_CLK_FREQ_HZ / rpu_tx_count[i] / MB); + LPRINTF(" RPU receive: %x, %lu MB/s\n", rpu_rx_count[i], + s * iterations * TTC_CLK_FREQ_HZ / rpu_rx_count[i] / MB); + } + + LPRINTF("Finished shared memory throughput\n"); + +out: + if (lbuf) + metal_free_memory(lbuf); + if (apu_tx_count) + metal_free_memory(apu_tx_count); + if (apu_rx_count) + metal_free_memory(apu_rx_count); + if (rpu_tx_count) + metal_free_memory(rpu_tx_count); + if (rpu_rx_count) + metal_free_memory(rpu_rx_count); + return ret; +} + +int shmem_throughput_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("shared memory throughput"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_shmem_throughput(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c new file mode 100644 index 00000000..5b4e555f --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c @@ -0,0 +1,31 @@ + +/* + * Copyright (c) 2016, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include "common.h" + +int sys_init() +{ + struct metal_init_params init_param = METAL_INIT_DEFAULTS; + int ret; + + ret = metal_init(&init_param); + if (ret) + LPERROR("Failed to initialize libmetal\n"); + return ret; +} + +void sys_cleanup() +{ + metal_finish(); +} + +void wait_for_interrupt(void) { + return; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h new file mode 100644 index 00000000..67d4cfb6 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h @@ -0,0 +1,16 @@ +/****************************************************************************** + * + * Copyright (C) 2017 Xilinx, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +#ifndef __SYS_INIT_H__ +#define __SYS_INIT_H__ + +int sys_init(); +void sys_cleanup(); + +#endif /* __SYS_INIT_H__ */ + From c2533b4ec605e223d16308e258d19d30f1bb9479 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 27 Mar 2019 14:23:15 -0700 Subject: [PATCH 04/24] examples: linux: libmetal_amp_demod: add macro guards for IPI Signed-off-by: Ben Levinsky --- .../system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c index 653ecac9..0eade5c2 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c @@ -25,9 +25,13 @@ #define IPI_IER_OFFSET 0x18 #define IPI_IDR_OFFSET 0x1C +#ifndef IPI_MASK #define IPI_MASK 0x1000000 +#endif /* IPI_MASK */ +#ifndef IPI_DEV_NAME #define IPI_DEV_NAME "ff310000.ipi" +#endif /* IPI_DEV_NAME */ #define SHM0_DESC_DEV_NAME "3ed00000.shm_desc" #define SHM1_DESC_DEV_NAME "3ed10000.shm_desc" #define SHM_DEV_NAME "3ed20000.shm" From 9f4a838501741bcfefb70fad022e88df73160bf4 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 27 Mar 2019 14:23:44 -0700 Subject: [PATCH 05/24] examples: linux: zynqmp_r5: dynamically set TTC and IPI vars Previously, TTC and IPI variables within libmetal demo for linux were hardcoded for zynqmp SoC. Now TTC and IPI are set based on whether target is zynqmp or versal. Signed-off-by: Ben Levinsky --- .../system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt | 10 ++++++++++ examples/system/linux/zynqmp/zynqmp_amp_demo/common.h | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt index aa94d8f9..da3bd3df 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt @@ -70,3 +70,13 @@ foreach (_app libmetal_amp_mb_shmem) endif (WITH_STATIC_LIB) endforeach (_app) +if (SOC_FAMILY STREQUAL "zynqmp") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") + add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.timer") + add_definitions(-DCONFIG_IPI_MASK=0x100) +elseif (SOC_FAMILY STREQUAL "versal") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff360000.ipi") + add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc0") + add_definitions(-DCONFIG_IPI_MASK=0x08) +endif (SOC_FAMILY STREQUAL "zynqmp") + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h index 26e72dc4..862b42f0 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -15,9 +15,9 @@ #include #define BUS_NAME "platform" -#define IPI_DEV_NAME "ff340000.ipi" +#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME #define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME "ff110000.timer" +#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME /* Apply this snippet to the device tree in an overlay so that * Linux userspace can see and use TTC0: @@ -36,7 +36,7 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK 0x100 /* IPI mask for kick from RPU. */ +#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from RPU. */ /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ From 6c174a7c6b24e7816028b6750e052b3c971bb50e Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 27 Mar 2019 14:24:09 -0700 Subject: [PATCH 06/24] examples: generic: zynqmp_r5: dynamically set TTC and IPI vars Previously, TTC and IPI variables within libmetal demo for baremetal were hardcoded for zynqmp SoC. Now TTC and IPI are set based on whether target is zynqmp or versal. Signed-off-by: Ben Levinsky --- .../system/generic/xlnx_r5/amp_demo/CMakeLists.txt | 13 +++++++++++++ examples/system/generic/xlnx_r5/amp_demo/common.h | 6 +++--- examples/system/generic/xlnx_r5/amp_demo/sys_init.c | 6 +++--- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt index 2389c7ea..2ab94a6f 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt @@ -26,3 +26,16 @@ target_compile_options (${_app0}.elf PUBLIC ${_ec_flgs}) target_link_libraries(${_app0}.elf -Wl,-Map=${_app0}.map -Wl,--gc-sections -T\"${_linker_script}\" -Wl,--start-group ${_deps} -Wl,--end-group) install (TARGETS ${_app0}.elf RUNTIME DESTINATION bin) +if (SOC_FAMILY STREQUAL "zynqmp") + add_definitions(-DCONFIG_TTC0_BASE_ADDR=XPAR_PSU_TTC_0_BASEADDR) + add_definitions(-DCONFIG_IPI_BASE_ADDR=XPAR_PSU_IPI_1_S_AXI_BASEADDR) + add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=65 -DCONFIG_IPI_MASK=0x1000000) + add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.ttc") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff310000.ipi") +elseif (SOC_FAMILY STREQUAL "versal") + add_definitions(-DCONFIG_TTC0_BASE_ADDR=0xFF0E0000) + add_definitions(-DCONFIG_IPI_BASE_ADDR=0xFF340000) + add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=63 -DCONFIG_IPI_MASK=0x0000020) + add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") +endif (SOC_FAMILY STREQUAL "zynqmp") diff --git a/examples/system/generic/xlnx_r5/amp_demo/common.h b/examples/system/generic/xlnx_r5/amp_demo/common.h index ece552c1..e1b33b7a 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/common.h +++ b/examples/system/generic/xlnx_r5/amp_demo/common.h @@ -26,9 +26,9 @@ /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME #define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME "ff110000.ttc" +#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -38,7 +38,7 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK 0x1000000 /* IPI mask for kick from APU. +#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from APU. We use PL0 IPI in this demo. */ /* TTC counter offsets */ diff --git a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c index 0915ebdc..57ac3316 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c @@ -29,11 +29,11 @@ #define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID -#define IPI_IRQ_VECT_ID 65 +#define IPI_IRQ_VECT_ID CONFIG_IPI_IRQ_VECT_ID #define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR 0xFF110000 -#define IPI_BASE_ADDR 0xFF310000 +#define TTC0_BASE_ADDR CONFIG_TTC0_BASE_ADDR +#define IPI_BASE_ADDR CONFIG_IPI_BASE_ADDR /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. From 1b5b5a5cb50e56104a376d29f4b4188a3c41a386 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 27 Mar 2019 14:24:23 -0700 Subject: [PATCH 07/24] examples: freertos: zynqmp_r5: dynamically set TTC and IPI vars Previously, TTC and IPI variables within libmetal demo for FreeRTOS were hardcoded for zynqmp SoC. Now TTC and IPI are set based on whether target is zynqmp or versal. Signed-off-by: Ben Levinsky --- .../system/freertos/xlnx_r5/amp_demo/CMakeLists.txt | 13 +++++++++++++ examples/system/freertos/xlnx_r5/amp_demo/common.h | 6 +++--- .../system/freertos/xlnx_r5/amp_demo/sys_init.c | 6 +++--- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt index 2389c7ea..c377bfd1 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt @@ -26,3 +26,16 @@ target_compile_options (${_app0}.elf PUBLIC ${_ec_flgs}) target_link_libraries(${_app0}.elf -Wl,-Map=${_app0}.map -Wl,--gc-sections -T\"${_linker_script}\" -Wl,--start-group ${_deps} -Wl,--end-group) install (TARGETS ${_app0}.elf RUNTIME DESTINATION bin) +if (SOC_FAMILY STREQUAL "zynqmp") + add_definitions(-DCONFIG_TTC0_BASE_ADDR=XPAR_PSU_TTC_0_BASEADDR) + add_definitions(-DCONFIG_IPI_BASE_ADDR=XPAR_PSU_IPI_1_S_AXI_BASEADDR) + add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=65 -DCONFIG_IPI_MASK=0x1000000) + add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.ttc") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff310000.ipi") +elseif (SOC_FAMILY STREQUAL "versal") + add_definitions(-DCONFIG_TTC0_BASE_ADDR=0xFF0E0000) + add_definitions(-DCONFIG_IPI_BASE_ADDR=0xFF340000) + add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=63 -DCONFIG_IPI_MASK=0x0000020) + add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc") + add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") +endif (SOC_FAMILY STREQUAL "zynqmp") diff --git a/examples/system/freertos/xlnx_r5/amp_demo/common.h b/examples/system/freertos/xlnx_r5/amp_demo/common.h index d6e29c7a..1f921947 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/common.h +++ b/examples/system/freertos/xlnx_r5/amp_demo/common.h @@ -24,9 +24,9 @@ /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME #define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME "ff110000.ttc" +#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -36,7 +36,7 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK 0x1000000 /* IPI mask for kick from APU. +#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from APU. We use PL0 IPI in this demo. */ /* TTC counter offsets */ diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c index 347332d9..198c7c6e 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c @@ -29,11 +29,11 @@ #define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID -#define IPI_IRQ_VECT_ID 65 +#define IPI_IRQ_VECT_ID CONFIG_IPI_IRQ_VECT_ID #define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR 0xFF110000 -#define IPI_BASE_ADDR 0xFF310000 +#define TTC0_BASE_ADDR CONFIG_TTC0_BASE_ADDR +#define IPI_BASE_ADDR CONFIG_IPI_BASE_ADDR /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. From 6b8208533e443768c959d9a2a1cd6145489473fa Mon Sep 17 00:00:00 2001 From: Sergei Korneichuk Date: Sun, 4 Oct 2020 16:42:02 -0700 Subject: [PATCH 08/24] libmetal: amp_demo: improve performance measurement Correct integer arithmetic errors in computing average latency. Collect and show min, max latency in addition to the average. Use floating point to compute and show throughput. Signed-off-by: Sergei Korneichuk Acked-by: ben Levinsky --- .../linux/zynqmp/zynqmp_amp_demo/common.h | 27 ++++++++++ .../zynqmp/zynqmp_amp_demo/ipi_latency_demo.c | 51 +++++++++++++++---- .../zynqmp_amp_demo/shmem_latency_demo.c | 33 +++++++----- .../zynqmp_amp_demo/shmem_throughput_demo.c | 17 ++++--- 4 files changed, 97 insertions(+), 31 deletions(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h index 862b42f0..bfe91cc8 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -231,4 +231,31 @@ void disable_ipi_kick(void); */ void enable_ipi_kick(void); +/** + * basic statistics + */ +struct metal_stat { + uint64_t st_cnt; + uint64_t st_sum; + uint64_t st_min; + uint64_t st_max; +}; +#define STAT_INIT { .st_cnt = 0, .st_sum = 0, .st_min = ~0UL, .st_max = 0, } + +/** + * @brief update_stat() - update basic statistics + * + * @param[in] pst - pointer to the struct stat + * @param[in] val - the value for the update + */ +static inline void update_stat(struct metal_stat *pst, uint64_t val) +{ + pst->st_cnt++; + pst->st_sum += val; + if (pst->st_min > val) + pst->st_min = val; + if (pst->st_max < val) + pst->st_max = val; +} + #endif /* __COMMON_H__ */ diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c index 53efaaba..1f7f45cf 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c @@ -34,13 +34,15 @@ #include #include #include +#include #include "common.h" #define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ #define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ #define TTC_CLK_FREQ_HZ 100000000 -#define NS_PER_SEC 1000000000 +#define NS_PER_SEC 1000000000 +#define NS_PER_TTC_TICK (NS_PER_SEC / TTC_CLK_FREQ_HZ) /* Shared memory offset */ #define SHM_DEMO_CNTRL_OFFSET 0x0 @@ -138,6 +140,27 @@ static int ipi_irq_handler (int vect_id, void *priv) return METAL_IRQ_NOT_HANDLED; } +/** + * @brief ttc_vs_clock_gettime() sanity check: TTC and CLOCK_MONOTONIC + * Compare TTC counts with the CLOCK_MONOTONIC over sleep(1). + * They should be very close, e.g. within 6 us for 100 MHz TTC + * + * @param[in] ch - channel information for the ttc timer + */ + +static void ttc_vs_clock_gettime(struct channel_s *ch) +{ + uint64_t ttc, lnx = metal_get_timestamp(); + + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + sleep(1); + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + lnx = metal_get_timestamp() - lnx; + ttc = NS_PER_TTC_TICK * read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + LPRINTF("sleep(1) check: TTC= %lu / CLOCK_MONOTONIC= %lu = %.2f\n", + ttc, lnx, lnx ? (ttc/(float)lnx) : 0); +} + /** * @brief measure_ipi_latency() - Measure latency of IPI * Repeatedly kick IPI to notify the remote and then wait for IPI kick @@ -155,13 +178,17 @@ static int ipi_irq_handler (int vect_id, void *priv) */ static int measure_ipi_latency(struct channel_s *ch) { - uint32_t apu_to_rpu_sum = 0, rpu_to_apu_sum = 0; + struct metal_stat a2r = STAT_INIT; + struct metal_stat r2a = STAT_INIT; + uint64_t delta_ns; int i; LPRINTF("Starting IPI latency task\n"); + ttc_vs_clock_gettime(ch); /* write to shared memory to indicate demo has started */ metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + delta_ns = metal_get_timestamp(); for ( i = 1; i <= ITERATIONS; i++) { /* Reset TTC counter */ reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); @@ -170,9 +197,10 @@ static int measure_ipi_latency(struct channel_s *ch) /* irq handler stops timer for rpu->apu irq */ wait_for_notified(&ch->remote_nkicked); - apu_to_rpu_sum += read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); - rpu_to_apu_sum += read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + update_stat(&a2r, read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU)); + update_stat(&r2a, read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU)); } + delta_ns = metal_get_timestamp() - delta_ns; /* write to shared memory to indicate demo has finished */ metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, 0); @@ -180,11 +208,16 @@ static int measure_ipi_latency(struct channel_s *ch) kick_ipi(NULL); /* report avg latencies */ - LPRINTF("IPI latency result with %i iterations:\n", ITERATIONS); - LPRINTF("APU to RPU average latency: %u ns \n", - apu_to_rpu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); - LPRINTF("RPU to APU average latency: %u ns \n", - rpu_to_apu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + LPRINTF("IPI latency: %i iterations took %lu ns (CLOCK_MONOTONIC)\n", + ITERATIONS, delta_ns); + LPRINTF("TTC [min,max] are in TTC ticks: %d ns per tick\n", + NS_PER_TTC_TICK); + LPRINTF("APU to RPU: [%lu, %lu] avg: %lu ns\n", + a2r.st_min, a2r.st_max, + a2r.st_sum * NS_PER_TTC_TICK / ITERATIONS); + LPRINTF("RPU to APU: [%lu, %lu] avg: %lu ns\n", + r2a.st_min, r2a.st_max, + r2a.st_sum * NS_PER_TTC_TICK / ITERATIONS); LPRINTF("Finished IPI latency task\n"); return 0; } diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c index 3133015a..8374f257 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c @@ -41,7 +41,8 @@ #define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ #define TTC_CLK_FREQ_HZ 100000000 -#define NS_PER_SEC 1000000000 +#define NS_PER_SEC 1000000000 +#define NS_PER_TTC_TICK (NS_PER_SEC / TTC_CLK_FREQ_HZ) /* Shared memory offset */ #define SHM_DEMO_CNTRL_OFFSET 0x0 /* Shared memory for the demo status */ @@ -166,14 +167,14 @@ static int ipi_irq_handler (int vect_id, void *priv) */ static int measure_shmem_latency(struct channel_s *ch) { - uint32_t apu_to_rpu_sum = 0, rpu_to_apu_sum = 0; - int i; size_t s; struct msg_hdr_s *msg_hdr; void *lbuf; - int ret; + int ret, i; - LPRINTF("Starting IPI latency task\n"); + LPRINTF("Starting shared memory latency task\n\t" + "TTC [min,max] are in TTC ticks: %d ns per tick\n", + NS_PER_TTC_TICK); /* allocate memory for receiving data */ lbuf = metal_allocate_memory(BUF_SIZE_MAX); if (!lbuf) { @@ -186,6 +187,8 @@ static int measure_shmem_latency(struct channel_s *ch) metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); for (s = PKG_SIZE_MIN; s <= PKG_SIZE_MAX; s <<= 1) { + struct metal_stat a2r = STAT_INIT; + struct metal_stat r2a = STAT_INIT; for (i = 1; i <= ITERATIONS; i++) { /* Reset TTC counter */ reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); @@ -220,18 +223,20 @@ static int measure_shmem_latency(struct channel_s *ch) /* Stop RPU to APU TTC counter */ stop_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); - apu_to_rpu_sum += read_timer(ch->ttc_io, - TTC_CNT_APU_TO_RPU); - rpu_to_apu_sum += read_timer(ch->ttc_io, - TTC_CNT_RPU_TO_APU); + update_stat(&a2r, read_timer(ch->ttc_io, + TTC_CNT_APU_TO_RPU)); + update_stat(&r2a, read_timer(ch->ttc_io, + TTC_CNT_RPU_TO_APU)); } /* report avg latencies */ - LPRINTF("package size %lu latency result:\n", s); - LPRINTF("APU to RPU average latency: %u ns \n", - apu_to_rpu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); - LPRINTF("RPU to APU average latency: %u ns \n", - rpu_to_apu_sum / ITERATIONS * NS_PER_SEC / TTC_CLK_FREQ_HZ ); + LPRINTF("package size %lu latency:\n", s); + LPRINTF(" APU to RPU: [%lu, %lu] avg: %lu ns\n", + a2r.st_min, a2r.st_max, + a2r.st_sum * NS_PER_TTC_TICK / ITERATIONS); + LPRINTF(" RPU to APU: [%lu, %lu] avg: %lu ns\n", + r2a.st_min, r2a.st_max, + r2a.st_sum * NS_PER_TTC_TICK / ITERATIONS); } /* write to shared memory to indicate demo has finished */ diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c index a5ff9482..f2058314 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c @@ -335,16 +335,17 @@ static int measure_shmem_throughput(struct channel_s* ch) } /* Print the measurement result */ + float mbs = TTC_CLK_FREQ_HZ * (TOTAL_DATA_SIZE / MB); for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { LPRINTF("Shared memory throughput of pkg size %lu : \n", s); - LPRINTF(" APU send: %x, %lu MB/s\n", apu_tx_count[i], - s * iterations * TTC_CLK_FREQ_HZ / apu_tx_count[i] / MB); - LPRINTF(" APU receive: %x, %lu MB/s\n", apu_rx_count[i], - s * iterations * TTC_CLK_FREQ_HZ / apu_rx_count[i] / MB); - LPRINTF(" RPU send: %x, %lu MB/s\n", rpu_tx_count[i], - s * iterations * TTC_CLK_FREQ_HZ / rpu_tx_count[i] / MB); - LPRINTF(" RPU receive: %x, %lu MB/s\n", rpu_rx_count[i], - s * iterations * TTC_CLK_FREQ_HZ / rpu_rx_count[i] / MB); + LPRINTF(" APU send: %u, %.1f MB/s\n", apu_tx_count[i], + mbs / apu_tx_count[i]); + LPRINTF(" RPU receive: %u, %.1f MB/s\n", rpu_rx_count[i], + mbs / rpu_rx_count[i]); + LPRINTF(" RPU send: %u, %.1f MB/s\n", rpu_tx_count[i], + mbs / rpu_tx_count[i]); + LPRINTF(" APU receive: %u, %.1f MB/s\n", apu_rx_count[i], + mbs / apu_rx_count[i]); } LPRINTF("Finished shared memory throughput\n"); From 75159e7fd0140f9cbe03696a540f095d874be40b Mon Sep 17 00:00:00 2001 From: Sergei Korneichuk Date: Mon, 9 Aug 2021 22:53:23 -0700 Subject: [PATCH 09/24] examples: linux: add an option to set debug log level Add an option to set METAL_LOG_DEBUG from the command line. Signed-off-by: Sergei Korneichuk Acked-for-series: Tanmay Shah --- .../linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c index 6dfaf7e1..d0f9aefd 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c @@ -72,9 +72,9 @@ * Report if any of the above tasks failed. * @return 0 - succeeded, non-zero for failures. */ -int main(void) +int main(int ac, char **av) { - int ret; + int ret, opt; ret = sys_init(); if (ret) { @@ -82,6 +82,11 @@ int main(void) return ret; } + while ((opt = getopt(ac, av, "d")) != -1) { + if (opt == 'd') + metal_set_log_level(METAL_LOG_DEBUG); + } + ret = shmem_demo(); if (ret) { LPERROR("shared memory demo failed.\n"); From c7edeb09ccd357ba072ec5bda3c4fc7a71c72efc Mon Sep 17 00:00:00 2001 From: Sergei Korneichuk Date: Wed, 5 Jan 2022 02:20:08 -0800 Subject: [PATCH 10/24] log: update ML_ERR, ML_INFO, ML_DBG macros Update and keep the original ML_ERR, ML_INFO, ML_DBG. Revert this commit when the OpenAMP code is converted to use the new metal_*() macros. Signed-off-by: Sergei Korneichuk --- lib/log.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/log.h b/lib/log.h index 464624c0..a5e06f12 100644 --- a/lib/log.h +++ b/lib/log.h @@ -105,6 +105,17 @@ void metal_default_log_handler(enum metal_log_level level, #define metal_info(fmt, args...) metal_log(METAL_LOG_INFO, fmt, ##args) #define metal_dbg(fmt, args...) metal_log(METAL_LOG_DEBUG, fmt, ##args) +/** + * Convenience macros ML_ERR, ML_INFO, ML_DBG add source + * function name and the line number before the message. + * Inspired by pr_err, pr_info, etc. in the kernel's printk.h. + * Keep the original ML_ERR, ML_INFO, ML_DBG until the open-amp + * code is converted to use the new metal_*() macros. + */ +#define ML_ERR(fmt, args...) metal_err(fmt, ##args) +#define ML_INFO(fmt, args...) metal_info(fmt, ##args) +#define ML_DBG(fmt, args...) metal_dbg(fmt, ##args) + /** @} */ #ifdef __cplusplus From ed732a9cc52c00e6812fde43eb0e36ac46da5037 Mon Sep 17 00:00:00 2001 From: Sergei Korneichuk Date: Thu, 23 Nov 2023 21:31:42 -0800 Subject: [PATCH 11/24] log: default to function, line number prefix Set WITH_FUNC_LINE_LOG to ON to match the original function, line number prefix in ML_ERR, ML_INFO, ML_DBG macros. Signed-off-by: Sergei Korneichuk --- cmake/options.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/options.cmake b/cmake/options.cmake index 1c776536..7e615a48 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -69,7 +69,7 @@ if (WITH_ZEPHYR) endif (WITH_ZEPHYR) option (WITH_DEFAULT_LOGGER "Build with default logger" ON) -option (WITH_FUNC_LINE_LOG "Log with function name, line number prefix" OFF) +option (WITH_FUNC_LINE_LOG "Log with function name, line number prefix" ON) option (WITH_DOC "Build with documentation" ON) From ee95b7a9e38ef2b7f5e80aaee873f61ec57b3dad Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 6 Jun 2022 14:03:34 -0700 Subject: [PATCH 12/24] test: linux: add test for multiple IO regions Add test to ensure that a linux device with multiple IO regions has each made accessible for both Read and Write and that each has IO regions created. Signed-off-by: Ben Levinsky Acked-by: Sergei Korneichuk --- test/system/linux/CMakeLists.txt | 1 + test/system/linux/io.c | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 test/system/linux/io.c diff --git a/test/system/linux/CMakeLists.txt b/test/system/linux/CMakeLists.txt index 1e259d8b..10c866d2 100644 --- a/test/system/linux/CMakeLists.txt +++ b/test/system/linux/CMakeLists.txt @@ -7,6 +7,7 @@ collect (PROJECT_LIB_TESTS threads.c) collect (PROJECT_LIB_TESTS spinlock.c) collect (PROJECT_LIB_TESTS alloc.c) collect (PROJECT_LIB_TESTS irq.c) +collect (PROJECT_LIB_TESTS io.c) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_MACHINE}) add_subdirectory(${PROJECT_MACHINE}) diff --git a/test/system/linux/io.c b/test/system/linux/io.c new file mode 100644 index 00000000..667f8a66 --- /dev/null +++ b/test/system/linux/io.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include "metal-test.h" + +/* + test snippet in device tree amba subnode: + shm0: shm@0 { + compatible = "shm_uio"; + reg = <0x0 3ed80000 0x0 0x1000 + 0x0 3fd80000 0x0 0x1000>; + }; +*/ + +#define OFFSET 0x10 +#define SHM_DEV "3ed80000.shm" +#define BUS "platform" +#define NUM_REGIONS 2 +#define TESTVAL 0xfeedbeef + + +/* Test for linux device having multiple, accessible IO regions */ +static int io_test(void) { + int ret = 0, i; + struct metal_device *dev; + struct metal_io_region *io[2]; + + ret = metal_device_open(BUS, SHM_DEV, &dev); + if (ret) { + perror("metal_device_open(\"BUS\", \"SHM_DEV\", dev)\");"); + goto done; + } + + for (i = 0; i < NUM_REGIONS; ++i) { + io[i] = metal_device_io_region(dev, i); + if (io[i] == NULL) { + printf("Failed to map IO region (%d)\n", i); + ret = -EINVAL; + goto cleanup; + } + + /* write in some test value that differs between each region */ + metal_io_write32(io[i], OFFSET, TESTVAL + 1); + } + + for (i = 0; i < NUM_REGIONS; ++i) { + if (metal_io_read32(io[i], OFFSET) != (long unsigned int)(TESTVAL+1)) { + ret = -EINVAL; + break; + } + } + +cleanup: + metal_device_close(dev); + +done: + return ret; +} +METAL_ADD_TEST(io_test); From 2ac9c17d4c378e2dc2022c40f3395ee3f1157042 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Mon, 14 Aug 2023 04:45:04 -1200 Subject: [PATCH 13/24] examples: freertos: define xInterruptController in System Device Tree Flow When building with freertos demo in System Device Tree (SDT) workflow, the application fails to build because it is set to extern. This symbol is not found in the SDT BSP. Match generic Libmetal AMP Demo and set this symbol to static to fix compilation issue. Signed-off-by: Ben Levinsky tanmay.shah@amd.com --- examples/system/freertos/xlnx_r5/amp_demo/sys_init.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c index 198c7c6e..a7c4b572 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Copyright (C) 2010 - 2017 Xilinx, Inc. All rights reserved. + * Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * @@ -44,7 +44,12 @@ #define DEFAULT_PAGE_SHIFT (-1UL) #define DEFAULT_PAGE_MASK (-1UL) +#if defined(SDT) +/* System Device Tree (SDT) flow does not provide this symbol. */ +static XScuGic xInterruptController; +#else extern XScuGic xInterruptController; +#endif const metal_phys_addr_t metal_phys[] = { IPI_BASE_ADDR, /**< base IPI address */ From 3ef1b92a8d3c352def2262709dfca2b8ee668cd8 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Tue, 5 Sep 2023 08:13:32 -1200 Subject: [PATCH 14/24] examples: linux: zynqmp: Remove MB definition There is compilation error of the Linux ZynqMP AMP Demo in shmem_throughput_demo.c due to the MB definition being re-defined. The MB symbol is provided in lib/utilities.h as part of commit 7643ddae6ec8 ("lib: utilities: Match upstream for MB/GB macros") Signed-off-by: Ben Levinsky Acked-by: Sergei Korneichuk --- .../linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c index f2058314..073fe379 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * Copyright (C) 2022, Advanced Micro Devices, Inc. + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -48,6 +48,7 @@ #include #include #include +#include #include "common.h" #define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ @@ -73,8 +74,6 @@ #define PKG_SIZE_MIN 16 #define TOTAL_DATA_SIZE (1024 * 4096) -#define MB (1024 * 1024) /* Mega Bytes */ - struct channel_s { struct metal_device *shm_dev; /* Shared memory metal device */ struct metal_io_region *shm_io; /* Shared memory metal i/o region */ From d9882123643104afde74ed9f4e20c69a87a88558 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 13 Nov 2024 07:57:34 -0800 Subject: [PATCH 15/24] examples: generic: zynqmp_r5: Add symbols for System Device Flow flow Enable Libmetal AMP Demo build for ZynqMP, Versal and Versal NET SOC's by: 1. Enable CMake build to pass in linker flags. This will enable Xilinx-AMD System Device Tree Flow BSP-based tooling to pass required linker flags 2. Enable System Device Flow compliant linker script where many sections are placed in DDR for space and entry point provided by BSP is _vector_table instead of _boot 3. Ensure symbols are present for both classic and System Device Flow flow for each of the above SOC's. Also improve readability by moving symbol definitions to common.h Signed-off-by: Ben Levinsky Acked-for-series: Sergei Korneichuk --- .../generic/xlnx_r5/amp_demo/CMakeLists.txt | 27 +- .../system/generic/xlnx_r5/amp_demo/common.h | 50 ++- .../generic/xlnx_r5/amp_demo/lscript_sdt.ld | 294 +++++++++++++++++ .../amp_demo/lscript_sdt_versal_net.ld | 295 ++++++++++++++++++ .../generic/xlnx_r5/amp_demo/sys_init.c | 11 +- 5 files changed, 646 insertions(+), 31 deletions(-) create mode 100644 examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld create mode 100644 examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld diff --git a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt index 2ab94a6f..fcccea65 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt @@ -19,6 +19,20 @@ list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demod.c) +include(CheckSymbolExists) +check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) +check_symbol_exists(VERSAL_NET "bspconfig.h" IS_VERSAL_NET) +if (HAS_SYSTEM_DT) + if (IS_VERSAL_NET) + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt_versal_net.ld) + else() + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt.ld) + endif(IS_VERSAL_NET) +endif(HAS_SYSTEM_DT) + +if (DEFINED DEMO_LINK_FLAGS) + set (_deps "${_deps} ${DEMO_LINK_FLAGS}") +endif() get_property (_linker_options GLOBAL PROPERTY TEST_LINKER_OPTIONS) add_executable (${_app0}.elf ${_src0}) get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") @@ -26,16 +40,3 @@ target_compile_options (${_app0}.elf PUBLIC ${_ec_flgs}) target_link_libraries(${_app0}.elf -Wl,-Map=${_app0}.map -Wl,--gc-sections -T\"${_linker_script}\" -Wl,--start-group ${_deps} -Wl,--end-group) install (TARGETS ${_app0}.elf RUNTIME DESTINATION bin) -if (SOC_FAMILY STREQUAL "zynqmp") - add_definitions(-DCONFIG_TTC0_BASE_ADDR=XPAR_PSU_TTC_0_BASEADDR) - add_definitions(-DCONFIG_IPI_BASE_ADDR=XPAR_PSU_IPI_1_S_AXI_BASEADDR) - add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=65 -DCONFIG_IPI_MASK=0x1000000) - add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.ttc") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff310000.ipi") -elseif (SOC_FAMILY STREQUAL "versal") - add_definitions(-DCONFIG_TTC0_BASE_ADDR=0xFF0E0000) - add_definitions(-DCONFIG_IPI_BASE_ADDR=0xFF340000) - add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=63 -DCONFIG_IPI_MASK=0x0000020) - add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") -endif (SOC_FAMILY STREQUAL "zynqmp") diff --git a/examples/system/generic/xlnx_r5/amp_demo/common.h b/examples/system/generic/xlnx_r5/amp_demo/common.h index e1b33b7a..3c92e587 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/common.h +++ b/examples/system/generic/xlnx_r5/amp_demo/common.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * Copyright (C) 2022, Advanced Micro Devices, Inc. + * Copyright (c) 2017 - 2022, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -24,11 +24,46 @@ #include #include "sys_init.h" +#if defined(PLATFORM_ZYNQMP) + +#define TTC0_BASE_ADDR 0xff110000 +#define TTC_DEV_NAME "ff110000.ttc" +#define IPI_MASK 0x1000000 + +#if XPAR_CPU_ID == 0 +#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_BASE_ADDR 0xff310000 +#define IPI_IRQ_VECT_ID 65 +#else +#define IPI_DEV_NAME "ff320000.ipi" +#define IPI_BASE_ADDR 0xff320000 +#define IPI_IRQ_VECT_ID 66 +#endif + +#elif defined(versal) +#define TTC0_BASE_ADDR 0xFF0E0000 +#define IPI_BASE_ADDR 0xFF340000 +#define IPI_IRQ_VECT_ID 63 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "ff0e0000.ttc" +#define IPI_DEV_NAME "ff340000.ipi" + +#elif defined(VERSAL_NET) + +#define TTC0_BASE_ADDR 0xFD1C0000 +#define IPI_BASE_ADDR 0xEB340000 +#define IPI_IRQ_VECT_ID 90 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "fd1c0000.ttc" +#define IPI_DEV_NAME "eb340000.ipi" +#endif + /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME -#define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME +#define SHM_DEV_NAME "3ed80000.shm" + +#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID +#define SHM_BASE_ADDR 0x3ED80000 /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -38,9 +73,6 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from APU. - We use PL0 IPI in this demo. */ - /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ #define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ @@ -172,4 +204,4 @@ static inline void print_demo(char *name) } #endif /* __COMMON_H__ */ - + diff --git a/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld new file mode 100644 index 00000000..f59093c0 --- /dev/null +++ b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld @@ -0,0 +1,294 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psu_r5_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psu_r5_btcm_MEM_0 : ORIGIN = 0x20000, LENGTH = 0x10000 + psu_r5_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psu_r5_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psu_r5_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psu_r5_btcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psu_r5_btcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psu_r5_btcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psu_r5_btcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psu_r5_btcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psu_r5_btcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psu_r5_btcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psu_r5_btcm_MEM_0 + +.got : { + *(.got) +} > psu_r5_btcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psu_r5_btcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psu_r5_btcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psu_r5_btcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psu_r5_btcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psu_r5_btcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psu_r5_btcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psu_r5_btcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psu_r5_btcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psu_r5_btcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psu_r5_btcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psu_r5_btcm_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psu_r5_btcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psu_r5_btcm_MEM_0 + +_end = .; +} diff --git a/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld new file mode 100644 index 00000000..b0bf48dc --- /dev/null +++ b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld @@ -0,0 +1,295 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psx_r52_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psx_r52_btcm_MEM_0 : ORIGIN = 0x10000, LENGTH = 0x8000 + psx_r52_ctcm_MEM_0 : ORIGIN = 0x18000, LENGTH = 0x8000 + psx_r52_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psx_r52_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psx_r52_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psx_r52_atcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psx_r52_atcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psx_r52_atcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psx_r52_atcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psx_r52_atcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psx_r52_atcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psx_r52_atcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psx_r52_atcm_MEM_0 + +.got : { + *(.got) +} > psx_r52_atcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psx_r52_atcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psx_r52_atcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psx_r52_atcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psx_r52_atcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psx_r52_atcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psx_r52_atcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psx_r52_atcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psx_r52_atcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psx_r52_atcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psx_r52_atcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psx_r52_atcm_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psx_r52_atcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psx_r52_atcm_MEM_0 + +_end = .; +} diff --git a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c index 57ac3316..043a7cc2 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c @@ -1,6 +1,7 @@ /****************************************************************************** * - * Copyright (C) 2017 Xilinx, Inc. All rights reserved. + * Copyright (C) 2017 - 2022, Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause * @@ -27,14 +28,6 @@ #define UART_BAUD 9600 #endif -#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID - -#define IPI_IRQ_VECT_ID CONFIG_IPI_IRQ_VECT_ID - -#define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR CONFIG_TTC0_BASE_ADDR -#define IPI_BASE_ADDR CONFIG_IPI_BASE_ADDR - /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. * In baremetal system, the memory mapping is flat, there is no From 68f4ba0bfa24438c5349cd08ae43f970910f1d51 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Mon, 29 Jan 2024 02:56:14 -1200 Subject: [PATCH 16/24] lib: generic: xlnx: Use CMAKE_C_FLAGS to determine if SDT symbol is present Currently the file bspconfig.h file is used to determine if System Device Tree workflow is being used. Instead change condition to be if SDT is in CMAKE_C_FLAGS variable. Signed-off-by: Ben Levinsky Acked-by: Sergei Korneichuk --- lib/system/generic/xlnx/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/system/generic/xlnx/CMakeLists.txt b/lib/system/generic/xlnx/CMakeLists.txt index af02c7e1..8fe293f0 100644 --- a/lib/system/generic/xlnx/CMakeLists.txt +++ b/lib/system/generic/xlnx/CMakeLists.txt @@ -8,8 +8,6 @@ collect (PROJECT_LIB_HEADERS sys.h) collect (PROJECT_LIB_SOURCES irq.c) -include(CheckSymbolExists) -check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) -if (HAS_SYSTEM_DT) +if (CMAKE_C_FLAGS MATCHES "SDT") collect (PROJECT_LIB_HEADERS sys_devicetree.h) -endif() +endif (CMAKE_C_FLAGS MATCHES "SDT") From 5805b7fda9dd3b79e6f370e2c3b8ecb5674cf64f Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Tue, 30 Jan 2024 04:48:05 -1200 Subject: [PATCH 17/24] examples: linux: zynqmp: Add Versal-NET IPI Values Ensure that Versal-NET IPI values are present for Libmetal AMP Demo on Linux platform. As part of this, also copy convention in zynqmp_r5 area and move values to common.h Signed-off-by: Ben Levinsky Acked-by: Sergei Korneichuk --- .../zynqmp/zynqmp_amp_demo/CMakeLists.txt | 11 -------- .../linux/zynqmp/zynqmp_amp_demo/common.h | 26 +++++++++++++++---- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt index da3bd3df..d2b7c160 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt @@ -69,14 +69,3 @@ foreach (_app libmetal_amp_mb_shmem) endif (${PROJECT_SYSTEM} STREQUAL "linux") endif (WITH_STATIC_LIB) endforeach (_app) - -if (SOC_FAMILY STREQUAL "zynqmp") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") - add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.timer") - add_definitions(-DCONFIG_IPI_MASK=0x100) -elseif (SOC_FAMILY STREQUAL "versal") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff360000.ipi") - add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc0") - add_definitions(-DCONFIG_IPI_MASK=0x08) -endif (SOC_FAMILY STREQUAL "zynqmp") - diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h index bfe91cc8..e73b1288 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * Copyright (C) 2022, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -15,9 +15,27 @@ #include #define BUS_NAME "platform" -#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME #define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME + +#if defined(PLATFORM_ZYNQMP) + +#define IPI_DEV_NAME "ff340000.ipi" +#define IPI_MASK 0x20 +#define TTC_DEV_NAME "ff110000.timer" + +#elif defined(versal) + +#define IPI_DEV_NAME "ff3600000.ipi" +#define IPI_MASK 0x08 +#define TTC_DEV_NAME "ff0e0000.ttc0" + +#elif defined(VERSAL_NET) + +#define IPI_DEV_NAME "eb3600000.ipi" +#define IPI_MASK 0x08 +#define TTC_DEV_NAME "fd1c0000.ttc0" + +#endif /* Apply this snippet to the device tree in an overlay so that * Linux userspace can see and use TTC0: @@ -36,8 +54,6 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from RPU. */ - /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ #define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ From 22e8f5fde5bb9a17653e638f69ba93d07ecb2788 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Mon, 19 Feb 2024 10:16:50 -1200 Subject: [PATCH 18/24] examples: linux: zynqmp: Fix Typo in ZynqMP IPI Base address Base address is ff3600000. Remove extra zero. Signed-off-by: Ben Levinsky Acked-by: Tanmay Shah --- examples/system/linux/zynqmp/zynqmp_amp_demo/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h index e73b1288..9bc6b109 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -25,7 +25,7 @@ #elif defined(versal) -#define IPI_DEV_NAME "ff3600000.ipi" +#define IPI_DEV_NAME "ff360000.ipi" #define IPI_MASK 0x08 #define TTC_DEV_NAME "ff0e0000.ttc0" From d381e628c2f2cf37e13faa65db33a5d4702acf60 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Mon, 26 Feb 2024 12:02:49 -1200 Subject: [PATCH 19/24] examples: linux: zynqmp: Enable Values to be mutable Enable application values for Linux platform to be overwritten via CMake Configure step. Also ZynqMP Bitmask is incorrect. Make sure it properly kicks RPU0. Signed-off-by: Ben Levinsky Acked-by: Tanmay Shah --- .../linux/zynqmp/zynqmp_amp_demo/common.h | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h index 9bc6b109..936ea37b 100644 --- a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -15,25 +15,52 @@ #include #define BUS_NAME "platform" + +#ifndef SHM_DEV_NAME #define SHM_DEV_NAME "3ed80000.shm" +#endif /* !SHM_DEV_NAME */ #if defined(PLATFORM_ZYNQMP) +#ifndef IPI_DEV_NAME #define IPI_DEV_NAME "ff340000.ipi" -#define IPI_MASK 0x20 +#endif /* !IPI_DEV_NAME */ + +#ifndef TTC_DEV_NAME #define TTC_DEV_NAME "ff110000.timer" +#endif /* !TTC_DEV_NAME */ + +#ifndef IPI_MASK +#define IPI_MASK 0x100 +#endif /* !IPI_MASK */ #elif defined(versal) +#ifndef IPI_DEV_NAME #define IPI_DEV_NAME "ff360000.ipi" +#endif /* !IPI_DEV_NAME */ + +#ifndef IPI_MASK #define IPI_MASK 0x08 +#endif /* !IPI_MASK */ + +#ifndef TTC_DEV_NAME #define TTC_DEV_NAME "ff0e0000.ttc0" +#endif /* TTC_DEV_NAME */ #elif defined(VERSAL_NET) +#ifndef IPI_DEV_NAME #define IPI_DEV_NAME "eb3600000.ipi" +#endif /* !IPI_DEV_NAME */ + +#ifndef IPI_MASK #define IPI_MASK 0x08 +#endif /* !IPI_MASK */ + +#ifndef TTC_DEV_NAME #define TTC_DEV_NAME "fd1c0000.ttc0" +#endif /* !TTC_DEV_NAME */ #endif From 3c27d8539bd07a9a2713483129b90c59b418ca6d Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Tue, 27 Feb 2024 05:25:15 -1200 Subject: [PATCH 20/24] examples: zynqmp_r5: CMake: Enable demo to build without libxil present in SDT flow As in SDT Workflow for Rigel there can be template application in BSP for Libmetal, let Rigel manage the dependency for template application in that case. Signed-off-by: Ben Levinsky Acked-by: Tanmay Shah --- examples/system/freertos/xlnx_r5/CMakeLists.txt | 12 +++++++----- examples/system/generic/xlnx_r5/CMakeLists.txt | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/examples/system/freertos/xlnx_r5/CMakeLists.txt b/examples/system/freertos/xlnx_r5/CMakeLists.txt index cf1008b5..72438afb 100644 --- a/examples/system/freertos/xlnx_r5/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/CMakeLists.txt @@ -7,11 +7,13 @@ collect(PROJECT_LIB_DEPS m) set (_lib "xil") find_library (_lib_path ${_lib}) -if (NOT _lib_path) - message ( "external library ${_lib_path} not found" ) - message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) - message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) -endif (NOT _lib_path) +if (NOT CMAKE_C_FLAGS MATCHES "SDT") + if (NOT _lib_path) + message ( "external library ${_lib_path} not found" ) + message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) + message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) + endif (NOT _lib_path) +endif (NOT CMAKE_C_FLAGS MATCHES "SDT") get_filename_component (_lib_path ${_lib_path} DIRECTORY) collect (PROJECT_LIB_DIRS ${_lib_path}) diff --git a/examples/system/generic/xlnx_r5/CMakeLists.txt b/examples/system/generic/xlnx_r5/CMakeLists.txt index eea72c2a..eddae6c3 100644 --- a/examples/system/generic/xlnx_r5/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/CMakeLists.txt @@ -10,11 +10,13 @@ list (APPEND _lib "xilmem") list (APPEND _lib "xilstandalone") endif (XILINX_PRE_V2019) find_library (_lib_path ${_lib}) -if (NOT _lib_path) - message ( "external library ${_lib_path} not found" ) - message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) - message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) -endif (NOT _lib_path) +if (NOT CMAKE_C_FLAGS MATCHES "SDT") + if (NOT _lib_path) + message ( "external library ${_lib_path} not found" ) + message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) + message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) + endif (NOT _lib_path) +endif (NOT CMAKE_C_FLAGS MATCHES "SDT") get_filename_component (_lib_path ${_lib_path} DIRECTORY) collect (PROJECT_LIB_DIRS ${_lib_path}) From 8eca38ff52318310020472429e381cc636b429d4 Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Wed, 14 Aug 2024 02:18:22 -1200 Subject: [PATCH 21/24] examples: zynqmp_r5: CMake: Add SDT-required libs as link dependencies Push required linking libs to CMake so that surrounding tooling does not have to manage pushing these in. Signed-off-by: Ben Levinsky Reviewed-by: Tanmay Shah --- examples/system/freertos/xlnx_r5/CMakeLists.txt | 8 ++++++++ examples/system/generic/xlnx_r5/CMakeLists.txt | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/examples/system/freertos/xlnx_r5/CMakeLists.txt b/examples/system/freertos/xlnx_r5/CMakeLists.txt index 72438afb..719c9b74 100644 --- a/examples/system/freertos/xlnx_r5/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/CMakeLists.txt @@ -5,6 +5,14 @@ collect(PROJECT_LIB_DEPS xil) collect(PROJECT_LIB_DEPS c) collect(PROJECT_LIB_DEPS m) +get_property (HAS_SYSTEM_DT GLOBAL PROPERTY HAS_SYSTEM_DT) +if (HAS_SYSTEM_DT) + # SDT flow has broken up libxil into multiple libs. Below libs are required + # for compilation. + collect(PROJECT_LIB_DEPS xilstandalone) + collect(PROJECT_LIB_DEPS xiltimer) +endif(HAS_SYSTEM_DT) + set (_lib "xil") find_library (_lib_path ${_lib}) if (NOT CMAKE_C_FLAGS MATCHES "SDT") diff --git a/examples/system/generic/xlnx_r5/CMakeLists.txt b/examples/system/generic/xlnx_r5/CMakeLists.txt index eddae6c3..b7b78064 100644 --- a/examples/system/generic/xlnx_r5/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/CMakeLists.txt @@ -4,6 +4,14 @@ collect(PROJECT_LIB_DEPS xil) collect(PROJECT_LIB_DEPS c) collect(PROJECT_LIB_DEPS m) +get_property (HAS_SYSTEM_DT GLOBAL PROPERTY HAS_SYSTEM_DT) +if (HAS_SYSTEM_DT) + # SDT flow has broken up libxil into multiple libs. Below libs are required + # for compilation. + collect(PROJECT_LIB_DEPS xilstandalone) + collect(PROJECT_LIB_DEPS xiltimer) +endif(HAS_SYSTEM_DT) + set (_lib "xil") if (XILINX_PRE_V2019) list (APPEND _lib "xilmem") From 7ae1fe931f349f575fd70e11bcc07002d54def5e Mon Sep 17 00:00:00 2001 From: "Levinsky, Ben" Date: Thu, 19 Sep 2024 03:30:30 -1200 Subject: [PATCH 22/24] examples: freertos: zynqmp_r5: zynqmp_amp_demo: Add symbols for System Device Flow flow Port changes in 38d29393d828e4ccdb3c621a704bab0352b45b93 to FreeRTOS demo Signed-off-by: Ben Levinsky Acked-by: Rajiv Mohan --- .../freertos/xlnx_r5/amp_demo/CMakeLists.txt | 29 +- .../system/freertos/xlnx_r5/amp_demo/common.h | 55 +++- .../freertos/xlnx_r5/amp_demo/lscript.ld | 66 ++-- .../freertos/xlnx_r5/amp_demo/lscript_sdt.ld | 294 +++++++++++++++++ .../amp_demo/lscript_sdt_versal_net.ld | 295 ++++++++++++++++++ .../freertos/xlnx_r5/amp_demo/sys_init.c | 10 +- .../freertos/xlnx_r5/amp_demo/sys_init.h | 3 +- 7 files changed, 684 insertions(+), 68 deletions(-) create mode 100644 examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld create mode 100644 examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld diff --git a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt index c377bfd1..ae3eb260 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt @@ -19,23 +19,24 @@ list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demod.c) +include(CheckSymbolExists) +check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) +check_symbol_exists(VERSAL_NET "bspconfig.h" IS_VERSAL_NET) +if (HAS_SYSTEM_DT) + if (IS_VERSAL_NET) + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt_versal_net.ld) + else() + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt.ld) + endif(IS_VERSAL_NET) +endif(HAS_SYSTEM_DT) + +if (DEFINED DEMO_LINK_FLAGS) + set (_deps "${_deps} ${DEMO_LINK_FLAGS}") +endif() + get_property (_linker_options GLOBAL PROPERTY TEST_LINKER_OPTIONS) add_executable (${_app0}.elf ${_src0}) get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") target_compile_options (${_app0}.elf PUBLIC ${_ec_flgs}) target_link_libraries(${_app0}.elf -Wl,-Map=${_app0}.map -Wl,--gc-sections -T\"${_linker_script}\" -Wl,--start-group ${_deps} -Wl,--end-group) install (TARGETS ${_app0}.elf RUNTIME DESTINATION bin) - -if (SOC_FAMILY STREQUAL "zynqmp") - add_definitions(-DCONFIG_TTC0_BASE_ADDR=XPAR_PSU_TTC_0_BASEADDR) - add_definitions(-DCONFIG_IPI_BASE_ADDR=XPAR_PSU_IPI_1_S_AXI_BASEADDR) - add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=65 -DCONFIG_IPI_MASK=0x1000000) - add_definitions(-DCONFIG_TTC_DEV_NAME="ff110000.ttc") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff310000.ipi") -elseif (SOC_FAMILY STREQUAL "versal") - add_definitions(-DCONFIG_TTC0_BASE_ADDR=0xFF0E0000) - add_definitions(-DCONFIG_IPI_BASE_ADDR=0xFF340000) - add_definitions(-DCONFIG_IPI_IRQ_VECT_ID=63 -DCONFIG_IPI_MASK=0x0000020) - add_definitions(-DCONFIG_TTC_DEV_NAME="ff0e0000.ttc") - add_definitions(-DCONFIG_IPI_DEV_NAME="ff340000.ipi") -endif (SOC_FAMILY STREQUAL "zynqmp") diff --git a/examples/system/freertos/xlnx_r5/amp_demo/common.h b/examples/system/freertos/xlnx_r5/amp_demo/common.h index 1f921947..a4810638 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/common.h +++ b/examples/system/freertos/xlnx_r5/amp_demo/common.h @@ -1,8 +1,9 @@ /* - * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ + * Copyright (c) 2017 - 2022, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ #ifndef __COMMON_H__ #define __COMMON_H__ @@ -22,11 +23,46 @@ #include #include "sys_init.h" +#if defined(PLATFORM_ZYNQMP) + +#define TTC0_BASE_ADDR 0xff110000 +#define TTC_DEV_NAME "ff110000.ttc" +#define IPI_MASK 0x1000000 + +#if XPAR_CPU_ID == 0 +#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_BASE_ADDR 0xff310000 +#define IPI_IRQ_VECT_ID 65 +#else +#define IPI_DEV_NAME "ff320000.ipi" +#define IPI_BASE_ADDR 0xff320000 +#define IPI_IRQ_VECT_ID 66 +#endif + +#elif defined(versal) +#define TTC0_BASE_ADDR 0xFF0E0000 +#define IPI_BASE_ADDR 0xFF340000 +#define IPI_IRQ_VECT_ID 63 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "ff0e0000.ttc" +#define IPI_DEV_NAME "ff340000.ipi" + +#elif defined(VERSAL_NET) + +#define TTC0_BASE_ADDR 0xFD1C0000 +#define IPI_BASE_ADDR 0xEB340000 +#define IPI_IRQ_VECT_ID 90 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "fd1c0000.ttc" +#define IPI_DEV_NAME "eb340000.ipi" +#endif + /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME CONFIG_IPI_DEV_NAME -#define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME CONFIG_TTC_DEV_NAME +#define SHM_DEV_NAME "3ed80000.shm" + +#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID +#define SHM_BASE_ADDR 0x3ED80000 /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -36,9 +72,6 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK CONFIG_IPI_MASK /* IPI mask for kick from APU. - We use PL0 IPI in this demo. */ - /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ #define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ @@ -160,4 +193,4 @@ static inline void print_demo(char *name) } #endif /* __COMMON_H__ */ - + diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld index 8d2830d5..b8dc60be 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld @@ -30,8 +30,8 @@ * ******************************************************************************/ -_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x1000; -_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x6000; +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; _ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; _SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; @@ -73,23 +73,23 @@ SECTIONS *(.vfp11_veneer) *(.ARM.extab) *(.gnu.linkonce.armextab.*) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_atcm_MEM_0 .init : { KEEP (*(.init)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fini : { KEEP (*(.fini)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .interp : { KEEP (*(.interp)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .note-ABI-tag : { KEEP (*(.note-ABI-tag)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .rodata : { __rodata_start = .; @@ -97,14 +97,14 @@ SECTIONS *(.rodata.*) *(.gnu.linkonce.r.*) __rodata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .rodata1 : { __rodata1_start = .; *(.rodata1) *(.rodata1.*) __rodata1_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sdata2 : { __sdata2_start = .; @@ -112,7 +112,7 @@ SECTIONS *(.sdata2.*) *(.gnu.linkonce.s2.*) __sdata2_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sbss2 : { __sbss2_start = .; @@ -120,7 +120,7 @@ SECTIONS *(.sbss2.*) *(.gnu.linkonce.sb2.*) __sbss2_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .data : { __data_start = .; @@ -131,18 +131,18 @@ SECTIONS *(.got) *(.got.plt) __data_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .data1 : { __data1_start = .; *(.data1) *(.data1.*) __data1_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .got : { *(.got) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ctors : { __CTOR_LIST__ = .; @@ -153,7 +153,7 @@ SECTIONS KEEP (*(.ctors)) __CTOR_END__ = .; ___CTORS_END___ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .dtors : { __DTOR_LIST__ = .; @@ -164,67 +164,67 @@ SECTIONS KEEP (*(.dtors)) __DTOR_END__ = .; ___DTORS_END___ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fixup : { __fixup_start = .; *(.fixup) __fixup_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .eh_frame : { *(.eh_frame) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .eh_framehdr : { __eh_framehdr_start = .; *(.eh_framehdr) __eh_framehdr_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .gcc_except_table : { *(.gcc_except_table) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .mmu_tbl (ALIGN(16384)) : { __mmu_tbl_start = .; *(.mmu_tbl) __mmu_tbl_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ARM.exidx : { __exidx_start = .; *(.ARM.exidx*) *(.gnu.linkonce.armexidix.*.*) __exidx_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .preinit_array : { __preinit_array_start = .; KEEP (*(SORT(.preinit_array.*))) KEEP (*(.preinit_array)) __preinit_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .init_array : { __init_array_start = .; KEEP (*(SORT(.init_array.*))) KEEP (*(.init_array)) __init_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fini_array : { __fini_array_start = .; KEEP (*(SORT(.fini_array.*))) KEEP (*(.fini_array)) __fini_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ARM.attributes : { __ARM.attributes_start = .; *(.ARM.attributes) __ARM.attributes_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sdata : { __sdata_start = .; @@ -232,7 +232,7 @@ SECTIONS *(.sdata.*) *(.gnu.linkonce.s.*) __sdata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sbss (NOLOAD) : { __sbss_start = .; @@ -240,7 +240,7 @@ SECTIONS *(.sbss.*) *(.gnu.linkonce.sb.*) __sbss_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .tdata : { __tdata_start = .; @@ -248,7 +248,7 @@ SECTIONS *(.tdata.*) *(.gnu.linkonce.td.*) __tdata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .tbss : { __tbss_start = .; @@ -256,7 +256,7 @@ SECTIONS *(.tbss.*) *(.gnu.linkonce.tb.*) __tbss_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .bss (NOLOAD) : { . = ALIGN(4); @@ -267,7 +267,7 @@ SECTIONS *(COMMON) . = ALIGN(4); __bss_end__ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 _SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); @@ -283,7 +283,7 @@ _SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); . += _HEAP_SIZE; _heap_end = .; HeapLimit = .; -} > psu_r5_atcm_MEM_0 +} > psu_r5_btcm_MEM_0 .stack (NOLOAD) : { . = ALIGN(16); @@ -311,7 +311,7 @@ _SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); . += _UNDEF_STACK_SIZE; . = ALIGN(16); __undef_stack = .; -} > psu_r5_atcm_MEM_0 +} > psu_r5_btcm_MEM_0 _end = .; } diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld new file mode 100644 index 00000000..da885b1b --- /dev/null +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld @@ -0,0 +1,294 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psu_r5_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psu_r5_btcm_MEM_0 : ORIGIN = 0x20000, LENGTH = 0x10000 + psu_r5_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psu_r5_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psu_r5_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psu_r5_btcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psu_r5_btcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psu_r5_btcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psu_r5_btcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psu_r5_btcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psu_r5_btcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psu_r5_btcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psu_r5_btcm_MEM_0 + +.got : { + *(.got) +} > psu_r5_btcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psu_r5_btcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psu_r5_btcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psu_r5_btcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psu_r5_btcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psu_r5_btcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psu_r5_btcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psu_r5_btcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psu_r5_btcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psu_r5_btcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psu_r5_btcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psu_r5_ddr_0_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psu_r5_btcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psu_r5_btcm_MEM_0 + +_end = .; +} diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld new file mode 100644 index 00000000..a570286b --- /dev/null +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld @@ -0,0 +1,295 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psx_r52_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psx_r52_btcm_MEM_0 : ORIGIN = 0x10000, LENGTH = 0x8000 + psx_r52_ctcm_MEM_0 : ORIGIN = 0x18000, LENGTH = 0x8000 + psx_r52_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psx_r52_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psx_r52_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psx_r52_atcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psx_r52_atcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psx_r52_atcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psx_r52_atcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psx_r52_atcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psx_r52_atcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psx_r52_atcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psx_r52_atcm_MEM_0 + +.got : { + *(.got) +} > psx_r52_atcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psx_r52_atcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psx_r52_atcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psx_r52_atcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psx_r52_atcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psx_r52_atcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psx_r52_atcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psx_r52_atcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psx_r52_atcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psx_r52_atcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psx_r52_atcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psx_r52_ddr_0_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psx_r52_atcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psx_r52_atcm_MEM_0 + +_end = .; +} diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c index a7c4b572..fb2e375c 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2022-2024, Advanced Micro Devices, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * @@ -27,14 +27,6 @@ #define UART_BAUD 9600 #endif -#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID - -#define IPI_IRQ_VECT_ID CONFIG_IPI_IRQ_VECT_ID - -#define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR CONFIG_TTC0_BASE_ADDR -#define IPI_BASE_ADDR CONFIG_IPI_BASE_ADDR - /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. * In FreeRTOS system, the memory mapping is flat, there is no diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h index 38487b66..4fe32873 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h @@ -1,6 +1,7 @@ /****************************************************************************** * - * Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved. + * Copyright (C) 2017-2022 Xilinx, Inc. All rights reserved. + * Copyright (c) 2022-2024, Advanced Micro Devices, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * From 19ece347c4f11a6fdb12181ff7cae7d8db3f3393 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 13 Nov 2024 08:17:52 -0800 Subject: [PATCH 23/24] examples: zynqmp_r5: Enable CMake to pass in app binary name Enable CMake tooling to designate output ELF name for FreeRTOS and Standalone targets. Signed-off-by: Ben Levinsky Reviewed-by: Tanmay Shah Signed-off-by: Ben Levinsky --- examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt | 8 +++++++- examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt index ae3eb260..3073e4c4 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt @@ -10,7 +10,13 @@ set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript.ld) set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/init_${PROJECT_SYSTEM}.c) set (_app0 libmetal_amp_demod) -set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/${_app0}.c) + +if (DEFINED CMAKE_PROJECT_NAME) + set (_app0 ${CMAKE_PROJECT_NAME}) +endif(DEFINED CMAKE_PROJECT_NAME) + +set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demod.c) + list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demod.c) diff --git a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt index fcccea65..f463bf87 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt @@ -10,7 +10,13 @@ set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript.ld) set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/init_${PROJECT_SYSTEM}.c) set (_app0 libmetal_amp_demod) -set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/${_app0}.c) + +if (DEFINED CMAKE_PROJECT_NAME) + set (_app0 ${CMAKE_PROJECT_NAME}) +endif(DEFINED CMAKE_PROJECT_NAME) + +set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demod.c) + list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demod.c) From 9dbceb6ff9d6fbb3645443c26d5ca52b8913daee Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Thu, 15 Aug 2024 06:33:36 -0700 Subject: [PATCH 24/24] cmake: platforms: xlnx: Update machine from 'zynqmp_' to 'xlnx_' Match update so that the cmake platform files are coupled to vendor and not SOC which is previously 'zynqmp_'. Signed-off-by: Ben Levinsky --- cmake/platforms/xlnx-a53-freertos.cmake | 8 ++++++++ ...zynqmp-a53-generic.cmake => xlnx-a53-generic.cmake} | 2 +- .../platforms/{zynqmp-linux.cmake => xlnx-linux.cmake} | 0 cmake/platforms/xlnx-r5-freertos.cmake | 8 ++++++++ cmake/platforms/xlnx-r5-generic.cmake | 10 ++++++++++ cmake/platforms/zynqmp-a53-freertos.cmake | 8 -------- cmake/platforms/zynqmp-r5-freertos.cmake | 8 -------- cmake/platforms/zynqmp-r5-generic.cmake | 10 ---------- 8 files changed, 27 insertions(+), 27 deletions(-) create mode 100644 cmake/platforms/xlnx-a53-freertos.cmake rename cmake/platforms/{zynqmp-a53-generic.cmake => xlnx-a53-generic.cmake} (81%) rename cmake/platforms/{zynqmp-linux.cmake => xlnx-linux.cmake} (100%) create mode 100644 cmake/platforms/xlnx-r5-freertos.cmake create mode 100644 cmake/platforms/xlnx-r5-generic.cmake delete mode 100644 cmake/platforms/zynqmp-a53-freertos.cmake delete mode 100644 cmake/platforms/zynqmp-r5-freertos.cmake delete mode 100644 cmake/platforms/zynqmp-r5-generic.cmake diff --git a/cmake/platforms/xlnx-a53-freertos.cmake b/cmake/platforms/xlnx-a53-freertos.cmake new file mode 100644 index 00000000..60b8e63e --- /dev/null +++ b/cmake/platforms/xlnx-a53-freertos.cmake @@ -0,0 +1,8 @@ +set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") +set (MACHINE "xlnx_a53" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") +set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") +set (CMAKE_C_FLAGS "" CACHE STRING "") + +include (cross-freertos-gcc) + diff --git a/cmake/platforms/zynqmp-a53-generic.cmake b/cmake/platforms/xlnx-a53-generic.cmake similarity index 81% rename from cmake/platforms/zynqmp-a53-generic.cmake rename to cmake/platforms/xlnx-a53-generic.cmake index 02309609..91d11959 100644 --- a/cmake/platforms/zynqmp-a53-generic.cmake +++ b/cmake/platforms/xlnx-a53-generic.cmake @@ -1,5 +1,5 @@ set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") -set (MACHINE "zynqmp_a53" CACHE STRING "") +set (MACHINE "xlnx_a53" CACHE STRING "") set (PROJECT_VENDOR "xlnx" CACHE STRING "") set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") set (CMAKE_C_FLAGS "" CACHE STRING "") diff --git a/cmake/platforms/zynqmp-linux.cmake b/cmake/platforms/xlnx-linux.cmake similarity index 100% rename from cmake/platforms/zynqmp-linux.cmake rename to cmake/platforms/xlnx-linux.cmake diff --git a/cmake/platforms/xlnx-r5-freertos.cmake b/cmake/platforms/xlnx-r5-freertos.cmake new file mode 100644 index 00000000..164ec046 --- /dev/null +++ b/cmake/platforms/xlnx-r5-freertos.cmake @@ -0,0 +1,8 @@ +set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") +set (MACHINE "xlnx_r5" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") +set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") +set (CMAKE_C_FLAGS "-mfloat-abi=soft -mcpu=cortex-r5" CACHE STRING "") + +include (cross-freertos-gcc) + diff --git a/cmake/platforms/xlnx-r5-generic.cmake b/cmake/platforms/xlnx-r5-generic.cmake new file mode 100644 index 00000000..84f811e9 --- /dev/null +++ b/cmake/platforms/xlnx-r5-generic.cmake @@ -0,0 +1,10 @@ +set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") +set (MACHINE "xlnx_r5" CACHE STRING "") +set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") + +# Xilinx SDK version earlier than 2017.2 use mfloat-abi=soft by default to generate libxil +set (CMAKE_C_FLAGS "-mfloat-abi=hard -mfpu=vfpv3-d16 -mcpu=cortex-r5" CACHE STRING "") + +include (cross-generic-gcc) + diff --git a/cmake/platforms/zynqmp-a53-freertos.cmake b/cmake/platforms/zynqmp-a53-freertos.cmake deleted file mode 100644 index b911b17d..00000000 --- a/cmake/platforms/zynqmp-a53-freertos.cmake +++ /dev/null @@ -1,8 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") -set (MACHINE "zynqmp_a53" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") -set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") -set (CMAKE_C_FLAGS "" CACHE STRING "") - -include (cross-freertos-gcc) - diff --git a/cmake/platforms/zynqmp-r5-freertos.cmake b/cmake/platforms/zynqmp-r5-freertos.cmake deleted file mode 100644 index b7e3e652..00000000 --- a/cmake/platforms/zynqmp-r5-freertos.cmake +++ /dev/null @@ -1,8 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") -set (MACHINE "zynqmp_r5" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") -set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") -set (CMAKE_C_FLAGS "-mfloat-abi=soft -mcpu=cortex-r5" CACHE STRING "") - -include (cross-freertos-gcc) - diff --git a/cmake/platforms/zynqmp-r5-generic.cmake b/cmake/platforms/zynqmp-r5-generic.cmake deleted file mode 100644 index a2264771..00000000 --- a/cmake/platforms/zynqmp-r5-generic.cmake +++ /dev/null @@ -1,10 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") -set (MACHINE "zynqmp_r5" CACHE STRING "") -set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") - -# Xilinx SDK version earlier than 2017.2 use mfloat-abi=soft by default to generate libxil -set (CMAKE_C_FLAGS "-mfloat-abi=hard -mfpu=vfpv3-d16 -mcpu=cortex-r5" CACHE STRING "") - -include (cross-generic-gcc) -