diff --git a/cmake/options.cmake b/cmake/options.cmake index 1c776536..7e615a48 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -69,7 +69,7 @@ if (WITH_ZEPHYR) endif (WITH_ZEPHYR) option (WITH_DEFAULT_LOGGER "Build with default logger" ON) -option (WITH_FUNC_LINE_LOG "Log with function name, line number prefix" OFF) +option (WITH_FUNC_LINE_LOG "Log with function name, line number prefix" ON) option (WITH_DOC "Build with documentation" ON) diff --git a/cmake/platforms/xlnx-a53-freertos.cmake b/cmake/platforms/xlnx-a53-freertos.cmake new file mode 100644 index 00000000..60b8e63e --- /dev/null +++ b/cmake/platforms/xlnx-a53-freertos.cmake @@ -0,0 +1,8 @@ +set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") +set (MACHINE "xlnx_a53" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") +set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") +set (CMAKE_C_FLAGS "" CACHE STRING "") + +include (cross-freertos-gcc) + diff --git a/cmake/platforms/zynqmp-a53-generic.cmake b/cmake/platforms/xlnx-a53-generic.cmake similarity index 81% rename from cmake/platforms/zynqmp-a53-generic.cmake rename to cmake/platforms/xlnx-a53-generic.cmake index 02309609..91d11959 100644 --- a/cmake/platforms/zynqmp-a53-generic.cmake +++ b/cmake/platforms/xlnx-a53-generic.cmake @@ -1,5 +1,5 @@ set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") -set (MACHINE "zynqmp_a53" CACHE STRING "") +set (MACHINE "xlnx_a53" CACHE STRING "") set (PROJECT_VENDOR "xlnx" CACHE STRING "") set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") set (CMAKE_C_FLAGS "" CACHE STRING "") diff --git a/cmake/platforms/zynqmp-linux.cmake b/cmake/platforms/xlnx-linux.cmake similarity index 100% rename from cmake/platforms/zynqmp-linux.cmake rename to cmake/platforms/xlnx-linux.cmake diff --git a/cmake/platforms/xlnx-r5-freertos.cmake b/cmake/platforms/xlnx-r5-freertos.cmake new file mode 100644 index 00000000..164ec046 --- /dev/null +++ b/cmake/platforms/xlnx-r5-freertos.cmake @@ -0,0 +1,8 @@ +set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") +set (MACHINE "xlnx_r5" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") +set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") +set (CMAKE_C_FLAGS "-mfloat-abi=soft -mcpu=cortex-r5" CACHE STRING "") + +include (cross-freertos-gcc) + diff --git a/cmake/platforms/xlnx-r5-generic.cmake b/cmake/platforms/xlnx-r5-generic.cmake new file mode 100644 index 00000000..84f811e9 --- /dev/null +++ b/cmake/platforms/xlnx-r5-generic.cmake @@ -0,0 +1,10 @@ +set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") +set (MACHINE "xlnx_r5" CACHE STRING "") +set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") +set (PROJECT_VENDOR "xlnx" CACHE STRING "") + +# Xilinx SDK version earlier than 2017.2 use mfloat-abi=soft by default to generate libxil +set (CMAKE_C_FLAGS "-mfloat-abi=hard -mfpu=vfpv3-d16 -mcpu=cortex-r5" CACHE STRING "") + +include (cross-generic-gcc) + diff --git a/cmake/platforms/zynqmp-a53-freertos.cmake b/cmake/platforms/zynqmp-a53-freertos.cmake deleted file mode 100644 index b911b17d..00000000 --- a/cmake/platforms/zynqmp-a53-freertos.cmake +++ /dev/null @@ -1,8 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "") -set (MACHINE "zynqmp_a53" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") -set (CROSS_PREFIX "aarch64-none-elf-" CACHE STRING "") -set (CMAKE_C_FLAGS "" CACHE STRING "") - -include (cross-freertos-gcc) - diff --git a/cmake/platforms/zynqmp-r5-freertos.cmake b/cmake/platforms/zynqmp-r5-freertos.cmake deleted file mode 100644 index b7e3e652..00000000 --- a/cmake/platforms/zynqmp-r5-freertos.cmake +++ /dev/null @@ -1,8 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") -set (MACHINE "zynqmp_r5" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") -set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") -set (CMAKE_C_FLAGS "-mfloat-abi=soft -mcpu=cortex-r5" CACHE STRING "") - -include (cross-freertos-gcc) - diff --git a/cmake/platforms/zynqmp-r5-generic.cmake b/cmake/platforms/zynqmp-r5-generic.cmake deleted file mode 100644 index a2264771..00000000 --- a/cmake/platforms/zynqmp-r5-generic.cmake +++ /dev/null @@ -1,10 +0,0 @@ -set (CMAKE_SYSTEM_PROCESSOR "arm" CACHE STRING "") -set (MACHINE "zynqmp_r5" CACHE STRING "") -set (CROSS_PREFIX "armr5-none-eabi-" CACHE STRING "") -set (PROJECT_VENDOR "xlnx" CACHE STRING "") - -# Xilinx SDK version earlier than 2017.2 use mfloat-abi=soft by default to generate libxil -set (CMAKE_C_FLAGS "-mfloat-abi=hard -mfpu=vfpv3-d16 -mcpu=cortex-r5" CACHE STRING "") - -include (cross-generic-gcc) - diff --git a/examples/system/freertos/xlnx_r5/CMakeLists.txt b/examples/system/freertos/xlnx_r5/CMakeLists.txt index cf1008b5..719c9b74 100644 --- a/examples/system/freertos/xlnx_r5/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/CMakeLists.txt @@ -5,13 +5,23 @@ collect(PROJECT_LIB_DEPS xil) collect(PROJECT_LIB_DEPS c) collect(PROJECT_LIB_DEPS m) +get_property (HAS_SYSTEM_DT GLOBAL PROPERTY HAS_SYSTEM_DT) +if (HAS_SYSTEM_DT) + # SDT flow has broken up libxil into multiple libs. Below libs are required + # for compilation. + collect(PROJECT_LIB_DEPS xilstandalone) + collect(PROJECT_LIB_DEPS xiltimer) +endif(HAS_SYSTEM_DT) + set (_lib "xil") find_library (_lib_path ${_lib}) -if (NOT _lib_path) - message ( "external library ${_lib_path} not found" ) - message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) - message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) -endif (NOT _lib_path) +if (NOT CMAKE_C_FLAGS MATCHES "SDT") + if (NOT _lib_path) + message ( "external library ${_lib_path} not found" ) + message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) + message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) + endif (NOT _lib_path) +endif (NOT CMAKE_C_FLAGS MATCHES "SDT") get_filename_component (_lib_path ${_lib_path} DIRECTORY) collect (PROJECT_LIB_DIRS ${_lib_path}) diff --git a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt index 2389c7ea..3073e4c4 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/freertos/xlnx_r5/amp_demo/CMakeLists.txt @@ -10,7 +10,13 @@ set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript.ld) set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/init_${PROJECT_SYSTEM}.c) set (_app0 libmetal_amp_demod) -set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/${_app0}.c) + +if (DEFINED CMAKE_PROJECT_NAME) + set (_app0 ${CMAKE_PROJECT_NAME}) +endif(DEFINED CMAKE_PROJECT_NAME) + +set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demod.c) + list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demod.c) @@ -19,10 +25,24 @@ list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demod.c) +include(CheckSymbolExists) +check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) +check_symbol_exists(VERSAL_NET "bspconfig.h" IS_VERSAL_NET) +if (HAS_SYSTEM_DT) + if (IS_VERSAL_NET) + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt_versal_net.ld) + else() + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt.ld) + endif(IS_VERSAL_NET) +endif(HAS_SYSTEM_DT) + +if (DEFINED DEMO_LINK_FLAGS) + set (_deps "${_deps} ${DEMO_LINK_FLAGS}") +endif() + get_property (_linker_options GLOBAL PROPERTY TEST_LINKER_OPTIONS) add_executable (${_app0}.elf ${_src0}) get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") target_compile_options (${_app0}.elf PUBLIC ${_ec_flgs}) target_link_libraries(${_app0}.elf -Wl,-Map=${_app0}.map -Wl,--gc-sections -T\"${_linker_script}\" -Wl,--start-group ${_deps} -Wl,--end-group) install (TARGETS ${_app0}.elf RUNTIME DESTINATION bin) - diff --git a/examples/system/freertos/xlnx_r5/amp_demo/common.h b/examples/system/freertos/xlnx_r5/amp_demo/common.h index d6e29c7a..a4810638 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/common.h +++ b/examples/system/freertos/xlnx_r5/amp_demo/common.h @@ -1,8 +1,9 @@ /* - * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - */ + * Copyright (c) 2017 - 2022, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ #ifndef __COMMON_H__ #define __COMMON_H__ @@ -22,11 +23,46 @@ #include #include "sys_init.h" +#if defined(PLATFORM_ZYNQMP) + +#define TTC0_BASE_ADDR 0xff110000 +#define TTC_DEV_NAME "ff110000.ttc" +#define IPI_MASK 0x1000000 + +#if XPAR_CPU_ID == 0 +#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_BASE_ADDR 0xff310000 +#define IPI_IRQ_VECT_ID 65 +#else +#define IPI_DEV_NAME "ff320000.ipi" +#define IPI_BASE_ADDR 0xff320000 +#define IPI_IRQ_VECT_ID 66 +#endif + +#elif defined(versal) +#define TTC0_BASE_ADDR 0xFF0E0000 +#define IPI_BASE_ADDR 0xFF340000 +#define IPI_IRQ_VECT_ID 63 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "ff0e0000.ttc" +#define IPI_DEV_NAME "ff340000.ipi" + +#elif defined(VERSAL_NET) + +#define TTC0_BASE_ADDR 0xFD1C0000 +#define IPI_BASE_ADDR 0xEB340000 +#define IPI_IRQ_VECT_ID 90 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "fd1c0000.ttc" +#define IPI_DEV_NAME "eb340000.ipi" +#endif + /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME "ff310000.ipi" -#define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME "ff110000.ttc" +#define SHM_DEV_NAME "3ed80000.shm" + +#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID +#define SHM_BASE_ADDR 0x3ED80000 /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -36,9 +72,6 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK 0x1000000 /* IPI mask for kick from APU. - We use PL0 IPI in this demo. */ - /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ #define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ @@ -160,4 +193,4 @@ static inline void print_demo(char *name) } #endif /* __COMMON_H__ */ - + diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld index 8d2830d5..b8dc60be 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript.ld @@ -30,8 +30,8 @@ * ******************************************************************************/ -_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x1000; -_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x6000; +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; _ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; _SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; @@ -73,23 +73,23 @@ SECTIONS *(.vfp11_veneer) *(.ARM.extab) *(.gnu.linkonce.armextab.*) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_atcm_MEM_0 .init : { KEEP (*(.init)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fini : { KEEP (*(.fini)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .interp : { KEEP (*(.interp)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .note-ABI-tag : { KEEP (*(.note-ABI-tag)) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .rodata : { __rodata_start = .; @@ -97,14 +97,14 @@ SECTIONS *(.rodata.*) *(.gnu.linkonce.r.*) __rodata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .rodata1 : { __rodata1_start = .; *(.rodata1) *(.rodata1.*) __rodata1_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sdata2 : { __sdata2_start = .; @@ -112,7 +112,7 @@ SECTIONS *(.sdata2.*) *(.gnu.linkonce.s2.*) __sdata2_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sbss2 : { __sbss2_start = .; @@ -120,7 +120,7 @@ SECTIONS *(.sbss2.*) *(.gnu.linkonce.sb2.*) __sbss2_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .data : { __data_start = .; @@ -131,18 +131,18 @@ SECTIONS *(.got) *(.got.plt) __data_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .data1 : { __data1_start = .; *(.data1) *(.data1.*) __data1_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .got : { *(.got) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ctors : { __CTOR_LIST__ = .; @@ -153,7 +153,7 @@ SECTIONS KEEP (*(.ctors)) __CTOR_END__ = .; ___CTORS_END___ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .dtors : { __DTOR_LIST__ = .; @@ -164,67 +164,67 @@ SECTIONS KEEP (*(.dtors)) __DTOR_END__ = .; ___DTORS_END___ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fixup : { __fixup_start = .; *(.fixup) __fixup_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .eh_frame : { *(.eh_frame) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .eh_framehdr : { __eh_framehdr_start = .; *(.eh_framehdr) __eh_framehdr_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .gcc_except_table : { *(.gcc_except_table) -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .mmu_tbl (ALIGN(16384)) : { __mmu_tbl_start = .; *(.mmu_tbl) __mmu_tbl_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ARM.exidx : { __exidx_start = .; *(.ARM.exidx*) *(.gnu.linkonce.armexidix.*.*) __exidx_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .preinit_array : { __preinit_array_start = .; KEEP (*(SORT(.preinit_array.*))) KEEP (*(.preinit_array)) __preinit_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .init_array : { __init_array_start = .; KEEP (*(SORT(.init_array.*))) KEEP (*(.init_array)) __init_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .fini_array : { __fini_array_start = .; KEEP (*(SORT(.fini_array.*))) KEEP (*(.fini_array)) __fini_array_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .ARM.attributes : { __ARM.attributes_start = .; *(.ARM.attributes) __ARM.attributes_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sdata : { __sdata_start = .; @@ -232,7 +232,7 @@ SECTIONS *(.sdata.*) *(.gnu.linkonce.s.*) __sdata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .sbss (NOLOAD) : { __sbss_start = .; @@ -240,7 +240,7 @@ SECTIONS *(.sbss.*) *(.gnu.linkonce.sb.*) __sbss_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .tdata : { __tdata_start = .; @@ -248,7 +248,7 @@ SECTIONS *(.tdata.*) *(.gnu.linkonce.td.*) __tdata_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .tbss : { __tbss_start = .; @@ -256,7 +256,7 @@ SECTIONS *(.tbss.*) *(.gnu.linkonce.tb.*) __tbss_end = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 .bss (NOLOAD) : { . = ALIGN(4); @@ -267,7 +267,7 @@ SECTIONS *(COMMON) . = ALIGN(4); __bss_end__ = .; -} > psu_r5_ddr_0_MEM_0 +} > psu_r5_btcm_MEM_0 _SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); @@ -283,7 +283,7 @@ _SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); . += _HEAP_SIZE; _heap_end = .; HeapLimit = .; -} > psu_r5_atcm_MEM_0 +} > psu_r5_btcm_MEM_0 .stack (NOLOAD) : { . = ALIGN(16); @@ -311,7 +311,7 @@ _SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); . += _UNDEF_STACK_SIZE; . = ALIGN(16); __undef_stack = .; -} > psu_r5_atcm_MEM_0 +} > psu_r5_btcm_MEM_0 _end = .; } diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld new file mode 100644 index 00000000..da885b1b --- /dev/null +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt.ld @@ -0,0 +1,294 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psu_r5_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psu_r5_btcm_MEM_0 : ORIGIN = 0x20000, LENGTH = 0x10000 + psu_r5_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psu_r5_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psu_r5_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psu_r5_btcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psu_r5_btcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psu_r5_btcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psu_r5_btcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psu_r5_btcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psu_r5_btcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psu_r5_btcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psu_r5_btcm_MEM_0 + +.got : { + *(.got) +} > psu_r5_btcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psu_r5_btcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psu_r5_btcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psu_r5_btcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psu_r5_btcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psu_r5_btcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psu_r5_btcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psu_r5_btcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psu_r5_btcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psu_r5_btcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psu_r5_btcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psu_r5_ddr_0_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psu_r5_btcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psu_r5_btcm_MEM_0 + +_end = .; +} diff --git a/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld new file mode 100644 index 00000000..a570286b --- /dev/null +++ b/examples/system/freertos/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld @@ -0,0 +1,295 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psx_r52_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psx_r52_btcm_MEM_0 : ORIGIN = 0x10000, LENGTH = 0x8000 + psx_r52_ctcm_MEM_0 : ORIGIN = 0x18000, LENGTH = 0x8000 + psx_r52_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psx_r52_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psx_r52_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psx_r52_atcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psx_r52_atcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psx_r52_atcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psx_r52_atcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psx_r52_atcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psx_r52_atcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psx_r52_atcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psx_r52_atcm_MEM_0 + +.got : { + *(.got) +} > psx_r52_atcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psx_r52_atcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psx_r52_atcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psx_r52_atcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psx_r52_atcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psx_r52_atcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psx_r52_atcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psx_r52_atcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psx_r52_atcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psx_r52_atcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psx_r52_atcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psx_r52_ddr_0_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psx_r52_atcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psx_r52_atcm_MEM_0 + +_end = .; +} diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c index 347332d9..fb2e375c 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.c @@ -1,6 +1,6 @@ /****************************************************************************** * - * Copyright (C) 2010 - 2017 Xilinx, Inc. All rights reserved. + * Copyright (c) 2022-2024, Advanced Micro Devices, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * @@ -27,14 +27,6 @@ #define UART_BAUD 9600 #endif -#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID - -#define IPI_IRQ_VECT_ID 65 - -#define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR 0xFF110000 -#define IPI_BASE_ADDR 0xFF310000 - /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. * In FreeRTOS system, the memory mapping is flat, there is no @@ -44,7 +36,12 @@ #define DEFAULT_PAGE_SHIFT (-1UL) #define DEFAULT_PAGE_MASK (-1UL) +#if defined(SDT) +/* System Device Tree (SDT) flow does not provide this symbol. */ +static XScuGic xInterruptController; +#else extern XScuGic xInterruptController; +#endif const metal_phys_addr_t metal_phys[] = { IPI_BASE_ADDR, /**< base IPI address */ diff --git a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h index 38487b66..4fe32873 100644 --- a/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h +++ b/examples/system/freertos/xlnx_r5/amp_demo/sys_init.h @@ -1,6 +1,7 @@ /****************************************************************************** * - * Copyright (C) 2008 - 2014 Xilinx, Inc. All rights reserved. + * Copyright (C) 2017-2022 Xilinx, Inc. All rights reserved. + * Copyright (c) 2022-2024, Advanced Micro Devices, Inc. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * diff --git a/examples/system/generic/xlnx_r5/CMakeLists.txt b/examples/system/generic/xlnx_r5/CMakeLists.txt index eea72c2a..b7b78064 100644 --- a/examples/system/generic/xlnx_r5/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/CMakeLists.txt @@ -4,17 +4,27 @@ collect(PROJECT_LIB_DEPS xil) collect(PROJECT_LIB_DEPS c) collect(PROJECT_LIB_DEPS m) +get_property (HAS_SYSTEM_DT GLOBAL PROPERTY HAS_SYSTEM_DT) +if (HAS_SYSTEM_DT) + # SDT flow has broken up libxil into multiple libs. Below libs are required + # for compilation. + collect(PROJECT_LIB_DEPS xilstandalone) + collect(PROJECT_LIB_DEPS xiltimer) +endif(HAS_SYSTEM_DT) + set (_lib "xil") if (XILINX_PRE_V2019) list (APPEND _lib "xilmem") list (APPEND _lib "xilstandalone") endif (XILINX_PRE_V2019) find_library (_lib_path ${_lib}) -if (NOT _lib_path) - message ( "external library ${_lib_path} not found" ) - message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) - message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) -endif (NOT _lib_path) +if (NOT CMAKE_C_FLAGS MATCHES "SDT") + if (NOT _lib_path) + message ( "external library ${_lib_path} not found" ) + message ( "hint: you may need to pass -DCMAKE_LIBRARY_PATH=" ) + message ( FATAL_ERROR "library ${_lib} is required to build the examples" ) + endif (NOT _lib_path) +endif (NOT CMAKE_C_FLAGS MATCHES "SDT") get_filename_component (_lib_path ${_lib_path} DIRECTORY) collect (PROJECT_LIB_DIRS ${_lib_path}) diff --git a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt index 2389c7ea..f463bf87 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt +++ b/examples/system/generic/xlnx_r5/amp_demo/CMakeLists.txt @@ -10,7 +10,13 @@ set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript.ld) set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/init_${PROJECT_SYSTEM}.c) set (_app0 libmetal_amp_demod) -set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/${_app0}.c) + +if (DEFINED CMAKE_PROJECT_NAME) + set (_app0 ${CMAKE_PROJECT_NAME}) +endif(DEFINED CMAKE_PROJECT_NAME) + +set (_src0 ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demod.c) + list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demod.c) @@ -19,6 +25,20 @@ list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demod.c) list(APPEND _src0 ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demod.c) +include(CheckSymbolExists) +check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) +check_symbol_exists(VERSAL_NET "bspconfig.h" IS_VERSAL_NET) +if (HAS_SYSTEM_DT) + if (IS_VERSAL_NET) + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt_versal_net.ld) + else() + set (_linker_script ${CMAKE_CURRENT_SOURCE_DIR}/lscript_sdt.ld) + endif(IS_VERSAL_NET) +endif(HAS_SYSTEM_DT) + +if (DEFINED DEMO_LINK_FLAGS) + set (_deps "${_deps} ${DEMO_LINK_FLAGS}") +endif() get_property (_linker_options GLOBAL PROPERTY TEST_LINKER_OPTIONS) add_executable (${_app0}.elf ${_src0}) get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") diff --git a/examples/system/generic/xlnx_r5/amp_demo/common.h b/examples/system/generic/xlnx_r5/amp_demo/common.h index ece552c1..3c92e587 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/common.h +++ b/examples/system/generic/xlnx_r5/amp_demo/common.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. - * Copyright (C) 2022, Advanced Micro Devices, Inc. + * Copyright (c) 2017 - 2022, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -24,11 +24,46 @@ #include #include "sys_init.h" +#if defined(PLATFORM_ZYNQMP) + +#define TTC0_BASE_ADDR 0xff110000 +#define TTC_DEV_NAME "ff110000.ttc" +#define IPI_MASK 0x1000000 + +#if XPAR_CPU_ID == 0 +#define IPI_DEV_NAME "ff310000.ipi" +#define IPI_BASE_ADDR 0xff310000 +#define IPI_IRQ_VECT_ID 65 +#else +#define IPI_DEV_NAME "ff320000.ipi" +#define IPI_BASE_ADDR 0xff320000 +#define IPI_IRQ_VECT_ID 66 +#endif + +#elif defined(versal) +#define TTC0_BASE_ADDR 0xFF0E0000 +#define IPI_BASE_ADDR 0xFF340000 +#define IPI_IRQ_VECT_ID 63 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "ff0e0000.ttc" +#define IPI_DEV_NAME "ff340000.ipi" + +#elif defined(VERSAL_NET) + +#define TTC0_BASE_ADDR 0xFD1C0000 +#define IPI_BASE_ADDR 0xEB340000 +#define IPI_IRQ_VECT_ID 90 +#define IPI_MASK 0x0000020 +#define TTC_DEV_NAME "fd1c0000.ttc" +#define IPI_DEV_NAME "eb340000.ipi" +#endif + /* Devices names */ #define BUS_NAME "generic" -#define IPI_DEV_NAME "ff310000.ipi" -#define SHM_DEV_NAME "3ed80000.shm" -#define TTC_DEV_NAME "ff110000.ttc" +#define SHM_DEV_NAME "3ed80000.shm" + +#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID +#define SHM_BASE_ADDR 0x3ED80000 /* IPI registers offset */ #define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ @@ -38,9 +73,6 @@ #define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ #define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ -#define IPI_MASK 0x1000000 /* IPI mask for kick from APU. - We use PL0 IPI in this demo. */ - /* TTC counter offsets */ #define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ #define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ @@ -172,4 +204,4 @@ static inline void print_demo(char *name) } #endif /* __COMMON_H__ */ - + diff --git a/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld new file mode 100644 index 00000000..f59093c0 --- /dev/null +++ b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt.ld @@ -0,0 +1,294 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psu_r5_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psu_r5_btcm_MEM_0 : ORIGIN = 0x20000, LENGTH = 0x10000 + psu_r5_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psu_r5_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psu_r5_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psu_r5_btcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psu_r5_btcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psu_r5_btcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psu_r5_btcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psu_r5_btcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psu_r5_btcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psu_r5_btcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psu_r5_btcm_MEM_0 + +.got : { + *(.got) +} > psu_r5_btcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psu_r5_btcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psu_r5_btcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psu_r5_btcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psu_r5_btcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psu_r5_btcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psu_r5_btcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psu_r5_btcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psu_r5_btcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psu_r5_btcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psu_r5_btcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psu_r5_btcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psu_r5_btcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psu_r5_btcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psu_r5_btcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psu_r5_btcm_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psu_r5_btcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psu_r5_btcm_MEM_0 + +_end = .; +} diff --git a/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld new file mode 100644 index 00000000..b0bf48dc --- /dev/null +++ b/examples/system/generic/xlnx_r5/amp_demo/lscript_sdt_versal_net.ld @@ -0,0 +1,295 @@ +/****************************************************************************** + * + * Copyright (C) 2017 - 2022 Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +_STACK_SIZE = DEFINED(_STACK_SIZE) ? _STACK_SIZE : 0x2000; +_HEAP_SIZE = DEFINED(_HEAP_SIZE) ? _HEAP_SIZE : 0x4000; + +_ABORT_STACK_SIZE = DEFINED(_ABORT_STACK_SIZE) ? _ABORT_STACK_SIZE : 1024; +_SUPERVISOR_STACK_SIZE = DEFINED(_SUPERVISOR_STACK_SIZE) ? _SUPERVISOR_STACK_SIZE : 2048; +_IRQ_STACK_SIZE = DEFINED(_IRQ_STACK_SIZE) ? _IRQ_STACK_SIZE : 1024; +_FIQ_STACK_SIZE = DEFINED(_FIQ_STACK_SIZE) ? _FIQ_STACK_SIZE : 1024; +_UNDEF_STACK_SIZE = DEFINED(_UNDEF_STACK_SIZE) ? _UNDEF_STACK_SIZE : 1024; + +/* Define Memories in the system */ + +MEMORY +{ + psx_r52_atcm_MEM_0 : ORIGIN = 0x0, LENGTH = 0x10000 + psx_r52_btcm_MEM_0 : ORIGIN = 0x10000, LENGTH = 0x8000 + psx_r52_ctcm_MEM_0 : ORIGIN = 0x18000, LENGTH = 0x8000 + psx_r52_ddr_0_MEM_0 : ORIGIN = 0x3ed00000, LENGTH = 0x80000 +} + +/* Specify the default entry point to the program */ + +ENTRY(_vector_table) + +/* Define the sections, and where they are mapped in memory */ + +SECTIONS +{ +.vectors : { + KEEP (*(.vectors)) + *(.boot) +} > psx_r52_atcm_MEM_0 + +.text : { + *(.text) + *(.text.*) + *(.gnu.linkonce.t.*) + *(.plt) + *(.gnu_warning) + *(.gcc_execpt_table) + *(.glue_7) + *(.glue_7t) + *(.vfp11_veneer) + *(.ARM.extab) + *(.gnu.linkonce.armextab.*) +} > psx_r52_ddr_0_MEM_0 + +.init : { + KEEP (*(.init)) +} > psx_r52_atcm_MEM_0 + +.fini : { + KEEP (*(.fini)) +} > psx_r52_atcm_MEM_0 + +.interp : { + KEEP (*(.interp)) +} > psx_r52_atcm_MEM_0 + +.note-ABI-tag : { + KEEP (*(.note-ABI-tag)) +} > psx_r52_atcm_MEM_0 + +.rodata : { + __rodata_start = .; + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + __rodata_end = .; +} > psx_r52_atcm_MEM_0 + +.rodata1 : { + __rodata1_start = .; + *(.rodata1) + *(.rodata1.*) + __rodata1_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata2 : { + __sdata2_start = .; + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + __sdata2_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss2 : { + __sbss2_start = .; + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + __sbss2_end = .; +} > psx_r52_atcm_MEM_0 + +.data : { + __data_start = .; + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + *(.jcr) + *(.got) + *(.got.plt) + __data_end = .; +} > psx_r52_atcm_MEM_0 + +.data1 : { + __data1_start = .; + *(.data1) + *(.data1.*) + __data1_end = .; +} > psx_r52_atcm_MEM_0 + +.got : { + *(.got) +} > psx_r52_atcm_MEM_0 + +.ctors : { + __CTOR_LIST__ = .; + ___CTORS_LIST___ = .; + KEEP (*crtbegin.o(.ctors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + __CTOR_END__ = .; + ___CTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.dtors : { + __DTOR_LIST__ = .; + ___DTORS_LIST___ = .; + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE(*crtend.o) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + __DTOR_END__ = .; + ___DTORS_END___ = .; +} > psx_r52_atcm_MEM_0 + +.fixup : { + __fixup_start = .; + *(.fixup) + __fixup_end = .; +} > psx_r52_atcm_MEM_0 + +.eh_frame : { + *(.eh_frame) +} > psx_r52_atcm_MEM_0 + +.eh_framehdr : { + __eh_framehdr_start = .; + *(.eh_framehdr) + __eh_framehdr_end = .; +} > psx_r52_atcm_MEM_0 + +.gcc_except_table : { + *(.gcc_except_table) +} > psx_r52_atcm_MEM_0 + +.mmu_tbl (ALIGN(16384)) : { + __mmu_tbl_start = .; + *(.mmu_tbl) + __mmu_tbl_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.exidx : { + __exidx_start = .; + *(.ARM.exidx*) + *(.gnu.linkonce.armexidix.*.*) + __exidx_end = .; +} > psx_r52_atcm_MEM_0 + +.preinit_array : { + __preinit_array_start = .; + KEEP (*(SORT(.preinit_array.*))) + KEEP (*(.preinit_array)) + __preinit_array_end = .; +} > psx_r52_atcm_MEM_0 + +.init_array : { + __init_array_start = .; + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + __init_array_end = .; +} > psx_r52_atcm_MEM_0 + +.fini_array : { + __fini_array_start = .; + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + __fini_array_end = .; +} > psx_r52_atcm_MEM_0 + +.ARM.attributes : { + __ARM.attributes_start = .; + *(.ARM.attributes) + __ARM.attributes_end = .; +} > psx_r52_atcm_MEM_0 + +.sdata : { + __sdata_start = .; + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + __sdata_end = .; +} > psx_r52_atcm_MEM_0 + +.sbss (NOLOAD) : { + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + __sbss_end = .; +} > psx_r52_atcm_MEM_0 + +.tdata : { + __tdata_start = .; + *(.tdata) + *(.tdata.*) + *(.gnu.linkonce.td.*) + __tdata_end = .; +} > psx_r52_atcm_MEM_0 + +.tbss : { + __tbss_start = .; + *(.tbss) + *(.tbss.*) + *(.gnu.linkonce.tb.*) + __tbss_end = .; +} > psx_r52_atcm_MEM_0 + +.bss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(.gnu.linkonce.b.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; +} > psx_r52_atcm_MEM_0 + +_SDA_BASE_ = __sdata_start + ((__sbss_end - __sdata_start) / 2 ); + +_SDA2_BASE_ = __sdata2_start + ((__sbss2_end - __sdata2_start) / 2 ); + +/* Generate Stack and Heap definitions */ + +.heap (NOLOAD) : { + . = ALIGN(16); + _heap = .; + HeapBase = .; + _heap_start = .; + . += _HEAP_SIZE; + _heap_end = .; + HeapLimit = .; +} > psx_r52_atcm_MEM_0 + +.stack (NOLOAD) : { + . = ALIGN(16); + _stack_end = .; + . += _STACK_SIZE; + _stack = .; + __stack = _stack; + . = ALIGN(16); + _irq_stack_end = .; + . += _IRQ_STACK_SIZE; + __irq_stack = .; + _supervisor_stack_end = .; + . += _SUPERVISOR_STACK_SIZE; + . = ALIGN(16); + __supervisor_stack = .; + _abort_stack_end = .; + . += _ABORT_STACK_SIZE; + . = ALIGN(16); + __abort_stack = .; + _fiq_stack_end = .; + . += _FIQ_STACK_SIZE; + . = ALIGN(16); + __fiq_stack = .; + _undef_stack_end = .; + . += _UNDEF_STACK_SIZE; + . = ALIGN(16); + __undef_stack = .; +} > psx_r52_atcm_MEM_0 + +_end = .; +} diff --git a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c index 0915ebdc..043a7cc2 100644 --- a/examples/system/generic/xlnx_r5/amp_demo/sys_init.c +++ b/examples/system/generic/xlnx_r5/amp_demo/sys_init.c @@ -1,6 +1,7 @@ /****************************************************************************** * - * Copyright (C) 2017 Xilinx, Inc. All rights reserved. + * Copyright (C) 2017 - 2022, Xilinx, Inc. All rights reserved. + * Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. * * SPDX-License-Identifier: BSD-3-Clause * @@ -27,14 +28,6 @@ #define UART_BAUD 9600 #endif -#define INTC_DEVICE_ID XPAR_SCUGIC_0_DEVICE_ID - -#define IPI_IRQ_VECT_ID 65 - -#define SHM_BASE_ADDR 0x3ED80000 -#define TTC0_BASE_ADDR 0xFF110000 -#define IPI_BASE_ADDR 0xFF310000 - /* Default generic I/O region page shift */ /* Each I/O region can contain multiple pages. * In baremetal system, the memory mapping is flat, there is no diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt new file mode 100644 index 00000000..d2b7c160 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/CMakeLists.txt @@ -0,0 +1,71 @@ +collector_list (_list PROJECT_INC_DIRS) +include_directories (${_list} ${CMAKE_CURRENT_SOURCE_DIR}) + +collector_list (_list PROJECT_LIB_DIRS) +link_directories (${_list}) + +collector_list (_deps PROJECT_LIB_DEPS) + +set (_src_common ${CMAKE_CURRENT_SOURCE_DIR}/sys_init.c) +get_property (_ec_flgs GLOBAL PROPERTY "PROJECT_EC_FLAGS") +foreach (_app libmetal_amp_demo libmetal_amp_demod) + set (_src ${CMAKE_CURRENT_SOURCE_DIR}/${_app}.c) + list(APPEND _src ${_src_common}) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi-uio.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demo.c) + if (WITH_SHARED_LIB) + add_executable (${_app}-share ${_src}) + if (PROJECT_EC_FLAGS) + string(REPLACE " " ";" _ec_flgs ${PROJECT_EC_FLAGS}) + target_compile_options (${_app}-share PUBLIC ${_ec_flgs}) + endif (PROJECT_EC_FLAGS) + target_link_libraries (${_app}-share ${PROJECT_NAME}-shared ${_deps}) + install (TARGETS ${_app}-share RUNTIME DESTINATION bin) + add_dependencies (${_app}-share ${PROJECT_NAME}-shared) + endif (WITH_SHARED_LIB) + + if (WITH_STATIC_LIB) + if (${PROJECT_SYSTEM} STREQUAL "linux") + add_executable (${_app}-static ${_src}) + if (PROJECT_EC_FLAGS) + string(REPLACE " " ";" _ec_flgs ${PROJECT_EC_FLAGS}) + target_compile_options (${_app}-static PUBLIC ${_ec_flgs}) + endif (PROJECT_EC_FLAGS) + target_link_libraries (${_app}-static ${PROJECT_NAME}-static ${_deps}) + install (TARGETS ${_app}-static RUNTIME DESTINATION bin) + endif (${PROJECT_SYSTEM} STREQUAL "linux") + endif (WITH_STATIC_LIB) +endforeach (_app) + +foreach (_app libmetal_amp_mb_shmem) + set (_src ${CMAKE_CURRENT_SOURCE_DIR}/libmetal_amp_demo.c) + list(APPEND _src ${_src_common}) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi-mb.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_atomic_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_shmem_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/ipi_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_latency_demo.c) + list(APPEND _src ${CMAKE_CURRENT_SOURCE_DIR}/shmem_throughput_demo.c) + if (WITH_SHARED_LIB) + add_executable (${_app}-share ${_src}) + target_compile_options (${_app}-share PUBLIC ${_ec_flgs}) + target_link_libraries (${_app}-share ${PROJECT_NAME}-shared ${_deps}) + install (TARGETS ${_app}-share RUNTIME DESTINATION bin) + add_dependencies (${_app}-share ${PROJECT_NAME}-shared) + endif (WITH_SHARED_LIB) + + if (WITH_STATIC_LIB) + if (${PROJECT_SYSTEM} STREQUAL "linux") + add_executable (${_app}-static ${_src}) + target_compile_options (${_app}-static PUBLIC ${_ec_flgs}) + target_link_libraries (${_app}-static ${PROJECT_NAME}-static ${_deps}) + install (TARGETS ${_app}-static RUNTIME DESTINATION bin) + endif (${PROJECT_SYSTEM} STREQUAL "linux") + endif (WITH_STATIC_LIB) +endforeach (_app) diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h new file mode 100644 index 00000000..936ea37b --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/common.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#include +#include +#include +#include +#include + +#define BUS_NAME "platform" + +#ifndef SHM_DEV_NAME +#define SHM_DEV_NAME "3ed80000.shm" +#endif /* !SHM_DEV_NAME */ + +#if defined(PLATFORM_ZYNQMP) + +#ifndef IPI_DEV_NAME +#define IPI_DEV_NAME "ff340000.ipi" +#endif /* !IPI_DEV_NAME */ + +#ifndef TTC_DEV_NAME +#define TTC_DEV_NAME "ff110000.timer" +#endif /* !TTC_DEV_NAME */ + +#ifndef IPI_MASK +#define IPI_MASK 0x100 +#endif /* !IPI_MASK */ + +#elif defined(versal) + +#ifndef IPI_DEV_NAME +#define IPI_DEV_NAME "ff360000.ipi" +#endif /* !IPI_DEV_NAME */ + +#ifndef IPI_MASK +#define IPI_MASK 0x08 +#endif /* !IPI_MASK */ + +#ifndef TTC_DEV_NAME +#define TTC_DEV_NAME "ff0e0000.ttc0" +#endif /* TTC_DEV_NAME */ + +#elif defined(VERSAL_NET) + +#ifndef IPI_DEV_NAME +#define IPI_DEV_NAME "eb3600000.ipi" +#endif /* !IPI_DEV_NAME */ + +#ifndef IPI_MASK +#define IPI_MASK 0x08 +#endif /* !IPI_MASK */ + +#ifndef TTC_DEV_NAME +#define TTC_DEV_NAME "fd1c0000.ttc0" +#endif /* !TTC_DEV_NAME */ + +#endif + +/* Apply this snippet to the device tree in an overlay so that + * Linux userspace can see and use TTC0: + * &TTC0 { + * compatible = "ttc0_libmetal_demo"; + * status = "okay"; + * }; + */ + + +/* IPI registers offset */ +#define IPI_TRIG_OFFSET 0x0 /* IPI trigger reg offset */ +#define IPI_OBS_OFFSET 0x4 /* IPI observation reg offset */ +#define IPI_ISR_OFFSET 0x10 /* IPI interrupt status reg offset */ +#define IPI_IMR_OFFSET 0x14 /* IPI interrupt mask reg offset */ +#define IPI_IER_OFFSET 0x18 /* IPI interrupt enable reg offset */ +#define IPI_IDR_OFFSET 0x1C /* IPI interrupt disable reg offset */ + +/* TTC counter offsets */ +#define XTTCPS_CLK_CNTRL_OFFSET 0x0 /* TTC counter clock control reg offset */ +#define XTTCPS_CNT_CNTRL_OFFSET 0xC /* TTC counter control reg offset */ +#define XTTCPS_CNT_VAL_OFFSET 0x18 /* TTC counter val reg offset */ +#define XTTCPS_CNT_OFFSET(ID) ((ID) == 1 ? 0 : 1 << (ID)) /* TTC counter offset + ID is from 1 to 3 */ + +/* TTC counter control masks */ +#define XTTCPS_CNT_CNTRL_RST_MASK 0x10U /* TTC counter control reset mask */ +#define XTTCPS_CNT_CNTRL_DIS_MASK 0x01U /* TTC counter control disable mask */ + +#define LPRINTF(format, ...) \ + printf("CLIENT> " format, ##__VA_ARGS__) + +#define LPERROR(format, ...) LPRINTF("ERROR: " format, ##__VA_ARGS__) + +/** + * @brief shmem_demo() - Show use of shared memory with Libmetal. + * For NUM_TIMES times, send message to RPU and notify RPU by writing to + * share mem that RPU is polling. Once detected, RPU will then similarly + * write message and notify APU and the APU will then verify the + * response. If the message does not match expected response, record + * error. Afterwards, report test result and clean up. + * Notes: + * * The RPU will repeatedly wait for shared mem. from APU until APU + * notifies remote by changing the KEEP_GOING value in shared memory. + * + * @return - return 0 on success, otherwise return error number indicating + * type of error + */ +int shmem_demo(); + +/** + * @brief ipi_shmem_demo() - shared memory IPI demo + * This task will: + * * Get the timestamp and put it into the ping shared memory + * * Update the shared memory descriptor for the new available + * ping buffer. + * * Trigger IPI to notifty the remote. + * * Repeat the above steps until it sends out all the packages. + * * Monitor IPI interrupt, verify every received package. + * * After all the packages are received, it sends out shutdown + * message to the remote. + * + * @return - return 0 on success, otherwise return error number indicating + * type of error. + */ +int ipi_shmem_demo(); + +/** + * @brief atomic_shmem_demo() - Shared memory atomic operation demo + * This task will: + * - Write to shared memory to notify the remote to start atomic add + * on the shared memory descriptor memory for 1000 times. + * - Start atomic add by 1 for 1000 times to first 32 bits of memory + * in the shared memory location at 3ed00000 which is + * pointed to by shm_io. + * - Wait for the remote to write to shared memory + * - Once it received the polling kick from the remote, it will check + * if the value stored in the shared memory for the atomic add is + * 2000. + * - It will print if the atomic add test has passed or not. + * + * @param[in] channel- hold shared mem. device + * @return - If setup failed, return the corresponding error number. Otherwise + * return 0 on success. + */ +int atomic_shmem_demo(); + +/** + * @brief ipi_latency_demo() - Show performance of IPI with Libmetal. + * For NUM_TIMES times, repeatedly send an IPI from APU and then detect + * this IPI from RPU and measure the latency. Similarly, measure the + * latency from RPU to APU. Each iteration, record this latency and + * after the loop has finished, report the total latency in nanseconds. + * Notes: + * * The RPU will repeatedly wait for IPI from APU until APU notifies + * remote by changing the KEEPGOING value in shared memory. + * * To further ensure the accuracy of the readings a different thread + * (i.e. the IRQ handler) will stop the timer measuring RPU to APU + * latency. + * + * @return - 0 on success, error code if failure. + */ +int ipi_latency_demo(); + +/** + * @brief shmem_latency_demo_demo() - Show performance of shared memory + * For 8, 512, and 1024 bytes, measure latency from block write to block + * read on remote side in shared memory. For each size, find average + * latency by running NUM_TIMES times and reporting the average latency + * for both APU block write to RPU block read as well as RPU block write + * to APU block read. + * + * @return - 0 on success, error code if failure. + */ +int shmem_latency_demo(); + +/** + * @brief shmem_throughput_demo_demo() - Show performance of shared memory + * Record average throughput for APU block read, write, RPU block read + * and write for sizes 1/2KB, 1KB and 2KB. For each size, run 1000 times + * each operation and record average. + * + * @return - 0 on success, error code if failure. + */ +int shmem_throughput_demo(); + +/** + * @brief wait_for_notified() - Loop until notified bit in channel is set. + * + * @param[in] notified - pointer to the notified variable + */ +static inline void wait_for_notified(atomic_flag *notified) +{ + unsigned int flags; + + do { + + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(notified)) { + metal_irq_restore_enable(flags); + break; + } + metal_cpu_yield(); + metal_irq_restore_enable(flags); + } while(1); +} + +/** + * @brief dump_buffer() - print hex value of each byte in the buffer + * + * @param[in] buf - pointer to the buffer + * @param[in] len - len of the buffer + */ +static inline void dump_buffer(void *buf, unsigned int len) +{ + unsigned int i; + unsigned char *tmp = (unsigned char *)buf; + + for (i = 0; i < len; i++) { + printf(" %02x", *(tmp++)); + if (!(i % 20)) + printf("\n"); + } + printf("\n"); +} + +/** + * @brief print_demo() - print demo string + * + * @param[in] name - demo name + */ +static inline void print_demo(char *name) +{ + LPRINTF("****** libmetal demo: %s ******\n", name); +} + +/** + * @brief ipi_kick_register_handler() - register for IPI kick handler + * + * @param[in] hd - handler function + * @param[in] priv - private data will be passed to the handler + */ +void ipi_kick_register_handler(metal_irq_handler hd, void *priv); + +/** + * @brief init_ipi() - Initialize IPI + * + * @return return 0 for success, negative value for failure. + */ +int init_ipi(void); + +/** + * @brief deinit_ipi() - Deinitialize IPI + */ +void deinit_ipi(void); + +/** + * @brief kick_ipi() - kick remote with IPI + */ +void kick_ipi(void *msg); + +/** + * @brief disable_ipi_kick() - disable IPI interrupt from remote kick + */ +void disable_ipi_kick(void); + +/** + * @brief enable_ipi_kick() - enable IPI interrupt from remote kick + */ +void enable_ipi_kick(void); + +/** + * basic statistics + */ +struct metal_stat { + uint64_t st_cnt; + uint64_t st_sum; + uint64_t st_min; + uint64_t st_max; +}; +#define STAT_INIT { .st_cnt = 0, .st_sum = 0, .st_min = ~0UL, .st_max = 0, } + +/** + * @brief update_stat() - update basic statistics + * + * @param[in] pst - pointer to the struct stat + * @param[in] val - the value for the update + */ +static inline void update_stat(struct metal_stat *pst, uint64_t val) +{ + pst->st_cnt++; + pst->st_sum += val; + if (pst->st_min > val) + pst->st_min = val; + if (pst->st_max < val) + pst->st_max = val; +} + +#endif /* __COMMON_H__ */ diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c new file mode 100644 index 00000000..11c952cf --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-mb.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the follwing steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define RPROC_SYSFS "/sys/class/remoteproc/remoteproc0" +#define RPROC_SYSFS_KICK RPROC_SYSFS"/kick" +#define RPROC_SYSFS_REMOTE_KICK RPROC_SYSFS"/remote_kick" + +struct ipi_channel { + char *kickf; /**< IPI kick sysfs path */ + char *rkickf; /**< IPI remote kick sysfs path */ + int rkick_fd; /**< IPI remote kick file descriptor */ + atomic_int ipi_enabled; /**< flag to indicate if IPI is enabled */ + atomic_int ipi_handling_wakeup; /**< IPI handling wakeup */ + metal_irq_handler ipi_kick_cb; /**< IPI kick callback */ + void *ipi_kick_priv; /**< IPI kick callback private data */ +}; + +static struct ipi_channel ipi_chnl; +static pthread_t ipi_pthread; +static int ipi_handling_stop_fd; + +/** + * @brief ipi_kick_handling() - IPI kick handling + * + * It will listening to the remote kick sysfs file. + * It will call the registered handler if there is a kick from the + * remote. + * + * @param[in] args - argument of pthread. + * @return NULL. + * + */ +static void *_ipi_kick_handling (void *args) +{ + int rkick_fd; + sigset_t set; + + (void)args; + /* unblock all signals */ + sigfillset(&set); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); + rkick_fd = ipi_chnl.rkick_fd; + while(1) { + char val[2] = {0, 0}; + int ret; + + lseek(rkick_fd, 0, SEEK_SET); + ret = read(rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to read %s.\n", ipi_chnl.rkickf); + return NULL; + } + if (val[0] == '1') { + if (atomic_load(&ipi_chnl.ipi_enabled) == 0) { + int v; + + v = atomic_load(&ipi_chnl.ipi_handling_wakeup); + syscall(SYS_futex, + &ipi_chnl.ipi_handling_wakeup, + FUTEX_WAIT, v, NULL, NULL, 0); + } + if (ipi_chnl.ipi_kick_cb) { + ipi_chnl.ipi_kick_cb(rkick_fd, + ipi_chnl.ipi_kick_priv); + } + lseek(rkick_fd, 0, SEEK_SET); + ret = write(rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to write %s.\n", + ipi_chnl.rkickf); + return NULL; + } + } else { + struct pollfd fds[2]; + + fds[0].fd = rkick_fd; + fds[0].events = POLLPRI; + fds[1].fd = ipi_handling_stop_fd; + fds[1].events = POLLIN; + ret = poll(fds, 2, -1); + if (ret == 0) { + LPERROR("polling %s times out.\n", + ipi_chnl.rkickf); + return NULL; + } else if (fds[1].revents & POLLIN) { + return NULL; + + } else if (ret < 0) { + LPERROR("polling %s failed, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + return NULL; + } + } + } +} + +static void _enable_ipi_intr(struct ipi_channel *chnl) +{ + int exp = 0; + + atomic_compare_exchange_strong(&chnl->ipi_enabled, &exp, 1); + if (exp == 0) { + atomic_fetch_add(&chnl->ipi_handling_wakeup, 1); + syscall(SYS_futex, &chnl->ipi_handling_wakeup, FUTEX_WAKE, + 1, NULL, NULL, 0); + } +} + +static void _disable_ipi_intr(struct ipi_channel *chnl) +{ + atomic_store(&chnl->ipi_enabled, 0); +} + +void ipi_kick_register_handler(metal_irq_handler hd, void *priv) +{ + ipi_chnl.ipi_kick_cb = hd; + ipi_chnl.ipi_kick_priv = priv; +} + +int init_ipi(void) +{ + int ret; + int val = 0; + + atomic_init(&ipi_chnl.ipi_enabled, 0); + atomic_init(&ipi_chnl.ipi_handling_wakeup, 0); + ipi_chnl.kickf = RPROC_SYSFS_KICK; + ipi_chnl.rkickf = RPROC_SYSFS_REMOTE_KICK; + + ret = open(ipi_chnl.rkickf, O_RDWR); + if (ret < 0) { + LPERROR("failed to open %s, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + return ret; + } + ipi_chnl.rkick_fd = ret; + /** Write to the remote kick fd to clear pending IPI interrupt */ + ret = write(ipi_chnl.rkick_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to write to %s, %s.\n", + ipi_chnl.rkickf, strerror(ret)); + close(ipi_chnl.rkick_fd); + return ret; + } + + ipi_handling_stop_fd = eventfd(0,0); + if (ipi_handling_stop_fd < 0) { + LPERROR("failed to create ipi handling stop fd.\n"); + close(ipi_chnl.rkick_fd); + return ipi_handling_stop_fd; + } + + ret = pthread_create(&ipi_pthread, NULL, _ipi_kick_handling, NULL); + if (ret < 0) { + LPERROR("failed to create IPI handling thread.\n"); + close(ipi_chnl.rkick_fd); + close(ipi_handling_stop_fd); + return ret; + } + return 0; +} + +void deinit_ipi(void) +{ + int ret; + uint64_t val = 1; + + ret = write(ipi_handling_stop_fd, &val, sizeof(val)); + if (ret < 0) { + LPERROR("failed to notify deinit ipi, %s.\n", strerror(ret)); + } + atomic_fetch_add(&ipi_chnl.ipi_handling_wakeup, 1); + syscall(SYS_futex, &ipi_chnl.ipi_handling_wakeup, FUTEX_WAKE, + 1, NULL, NULL, 0); + ret = pthread_join(ipi_pthread, NULL); + if (ret) { + LPERROR("failed to join IPI thread: %d.\n", ret); + } + close(ipi_handling_stop_fd); + close(ipi_chnl.rkick_fd); +} + +void kick_ipi(void *msg) +{ + int fd, ret; + int buf = -1; + + (void)msg; + fd = open(ipi_chnl.kickf, O_WRONLY); + if (fd < 0) { + LPERROR("failed to open %s.\n", ipi_chnl.kickf); + return; + } + + ret = write(fd, &buf, sizeof(buf)); + if (ret <= 0) + LPERROR("failed to write to %s, %s.\n", + ipi_chnl.kickf, strerror(ret)); + close(fd); +} + +void disable_ipi_kick(void) +{ + _disable_ipi_intr(&ipi_chnl); +} + +void enable_ipi_kick(void) +{ + _enable_ipi_intr(&ipi_chnl); +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c new file mode 100644 index 00000000..c807b803 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi-uio.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the follwing steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +struct ipi_channel { + struct metal_device *ipi_dev; /**< ipi metal device */ + struct metal_io_region *ipi_io; /**< ipi metal I/O region */ + int ipi_irq; /**< ipi irq id */ + uint32_t ipi_mask; /**< remote IPI mask */ + metal_irq_handler ipi_kick_cb; /**< IPI kick callback */ + void *ipi_kick_priv; /**< IPI kick callback private data */ +}; + +static struct ipi_channel ipi_chnl; + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int _ipi_irq_handler (int vect_id, void *priv) +{ + uint32_t val; + struct ipi_channel *chnl = (struct ipi_channel *)priv; + struct metal_io_region *io; + + (void)vect_id; + + io = chnl->ipi_io; + val = metal_io_read32(io, IPI_ISR_OFFSET); + if (val & chnl->ipi_mask) { + if (chnl->ipi_kick_cb != NULL) + chnl->ipi_kick_cb(vect_id, chnl->ipi_kick_priv); + metal_io_write32(io, IPI_ISR_OFFSET, + chnl->ipi_mask); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +static void _enable_ipi_intr(struct ipi_channel *chnl) +{ + metal_irq_enable(chnl->ipi_irq); + /* Enable IPI interrupt */ + metal_io_write32(chnl->ipi_io, IPI_IER_OFFSET, chnl->ipi_mask); +} + +static void _disable_ipi_intr(struct ipi_channel *chnl) +{ + /* disable IPI interrupt */ + metal_io_write32(chnl->ipi_io, IPI_IDR_OFFSET, chnl->ipi_mask); + metal_irq_disable(ipi_chnl.ipi_irq); +} + +void ipi_kick_register_handler(metal_irq_handler hd, void *priv) +{ + ipi_chnl.ipi_kick_cb = hd; + ipi_chnl.ipi_kick_priv = priv; +} +int init_ipi(void) +{ + struct metal_device *dev; + struct metal_io_region *io; + int ret; + + /* Open IPI device */ + ret = metal_device_open(BUS_NAME, IPI_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", IPI_DEV_NAME); + return ret; + } + + /* Get IPI device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + metal_device_close(dev); + return ret; + } + ipi_chnl.ipi_dev = dev; + ipi_chnl.ipi_io = io; + + /* Get the IPI IRQ from the opened IPI device */ + ipi_chnl.ipi_irq = (intptr_t)dev->irq_info; + + ipi_chnl.ipi_mask = IPI_MASK; + /* disable IPI interrupt */ + _disable_ipi_intr(&ipi_chnl); + /* clear old IPI interrupt */ + metal_io_write32(io, IPI_ISR_OFFSET, IPI_MASK); + /* Register IPI irq handler */ + metal_irq_register(ipi_chnl.ipi_irq, _ipi_irq_handler, &ipi_chnl); + return 0; +} + +void deinit_ipi(void) +{ + /* disable IPI interrupt */ + _disable_ipi_intr(&ipi_chnl); + /* unregister IPI irq handler by setting the handler to 0 */ + metal_irq_unregister(ipi_chnl.ipi_irq); + if (ipi_chnl.ipi_dev) { + metal_device_close(ipi_chnl.ipi_dev); + ipi_chnl.ipi_dev = NULL; + } +} + +void kick_ipi(void *msg) +{ + (void)msg; + metal_io_write32(ipi_chnl.ipi_io, IPI_TRIG_OFFSET, ipi_chnl.ipi_mask); +} + +void disable_ipi_kick(void) +{ + _disable_ipi_intr(&ipi_chnl); +} +void enable_ipi_kick(void) +{ + _enable_ipi_intr(&ipi_chnl); +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c new file mode 100644 index 00000000..1f7f45cf --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_latency_demo.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_latency_demo.c + * This demo measures the IPI latency between the APU and RPU. + * This demo does the following steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter and then kick IPI to notify the + * remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler to stop + * the RPU to APU TTC counter. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 +#define NS_PER_TTC_TICK (NS_PER_SEC / TTC_CLK_FREQ_HZ) + +/* Shared memory offset */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define ITERATIONS 1000 + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + /* stop RPU -> APU timer */ + stop_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief ttc_vs_clock_gettime() sanity check: TTC and CLOCK_MONOTONIC + * Compare TTC counts with the CLOCK_MONOTONIC over sleep(1). + * They should be very close, e.g. within 6 us for 100 MHz TTC + * + * @param[in] ch - channel information for the ttc timer + */ + +static void ttc_vs_clock_gettime(struct channel_s *ch) +{ + uint64_t ttc, lnx = metal_get_timestamp(); + + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + sleep(1); + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + lnx = metal_get_timestamp() - lnx; + ttc = NS_PER_TTC_TICK * read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + LPRINTF("sleep(1) check: TTC= %lu / CLOCK_MONOTONIC= %lu = %.2f\n", + ttc, lnx, lnx ? (ttc/(float)lnx) : 0); +} + +/** + * @brief measure_ipi_latency() - Measure latency of IPI + * Repeatedly kick IPI to notify the remote and then wait for IPI kick + * from RPU and measure the latency. Similarly, measure the latency + * from RPU to APU. Each iteration, record this latency and after the + * loop has finished, report the total latency in nanseconds. + * Notes: + * - RPU will repeatedly wait for IPI from APU until APU + * notifies remote demo has finished by setting the value in the + * shared memory. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_ipi_latency(struct channel_s *ch) +{ + struct metal_stat a2r = STAT_INIT; + struct metal_stat r2a = STAT_INIT; + uint64_t delta_ns; + int i; + + LPRINTF("Starting IPI latency task\n"); + ttc_vs_clock_gettime(ch); + /* write to shared memory to indicate demo has started */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + delta_ns = metal_get_timestamp(); + for ( i = 1; i <= ITERATIONS; i++) { + /* Reset TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + /* irq handler stops timer for rpu->apu irq */ + wait_for_notified(&ch->remote_nkicked); + + update_stat(&a2r, read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU)); + update_stat(&r2a, read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU)); + } + delta_ns = metal_get_timestamp() - delta_ns; + + /* write to shared memory to indicate demo has finished */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + + /* report avg latencies */ + LPRINTF("IPI latency: %i iterations took %lu ns (CLOCK_MONOTONIC)\n", + ITERATIONS, delta_ns); + LPRINTF("TTC [min,max] are in TTC ticks: %d ns per tick\n", + NS_PER_TTC_TICK); + LPRINTF("APU to RPU: [%lu, %lu] avg: %lu ns\n", + a2r.st_min, a2r.st_max, + a2r.st_sum * NS_PER_TTC_TICK / ITERATIONS); + LPRINTF("RPU to APU: [%lu, %lu] avg: %lu ns\n", + r2a.st_min, r2a.st_max, + r2a.st_sum * NS_PER_TTC_TICK / ITERATIONS); + LPRINTF("Finished IPI latency task\n"); + return 0; +} + +int ipi_latency_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("IPI latency"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_ipi_latency(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c new file mode 100644 index 00000000..c5ef173f --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/ipi_shmem_demo.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * ipi_shmem_demo.c - shared memory with IPI demo + * This demo will: + * 1. Open the shared memory device. + * 2. Open the IPI device. + * 3. Register IPI interrupt handler. + * 4. Write message to the shared memory. + * 5. Kick IPI to notify there is a message written to the shared memory + * 6. Wait until the remote has kicked the IPI to notify the remote + * has echoed back the message. + * 7. Read the message from shared memory. + * 8. Verify the message + * 9. Repeat step 4 to 8 for 100 times. + * 10. Clean up: deregister the IPI interrupt handler, close the IPI device + * , close the shared memory device. + * + * Here is the Shared memory structure of this demo: + * |0x0 - 0x03 | number of APU to RPU buffers available to RPU | + * |0x04 - 0x07 | number of APU to RPU buffers consumed by RPU | + * |0x08 - 0x1FFC | address array for shared buffers from APU to RPU | + * |0x2000 - 0x2003 | number of RPU to APU buffers available to APU | + * |0x2004 - 0x2007 | number of RPU to APU buffers consumed by APU | + * |0x2008 - 0x3FFC | address array for shared buffers from RPU to APU | + * |0x04000 - 0x103FFC | APU to RPU buffers | + * |0x104000 - 0x203FFC | RPU to APU buffers | + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* Shared memory offsets */ +#define SHM_DESC_OFFSET_TX 0x0 +#define SHM_BUFF_OFFSET_TX 0x04000 +#define SHM_DESC_OFFSET_RX 0x02000 +#define SHM_BUFF_OFFSET_RX 0x104000 + +/* Shared memory descriptors offset */ +#define SHM_DESC_AVAIL_OFFSET 0x00 +#define SHM_DESC_USED_OFFSET 0x04 +#define SHM_DESC_ADDR_ARRAY_OFFSET 0x08 + +#define PKGS_TOTAL 1024 + +#define BUF_SIZE_MAX 512 +#define SHUTDOWN "shutdown" + +#define NS_PER_S (1000 * 1000 * 1000) + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +static atomic_flag remote_nkicked; /* is remote kicked, 0 - kicked, + 1 - not-kicked */ + +/** + * @brief get_timestamp() - Get the timestamp + * IT gets the timestamp and return nanoseconds. + * + * @return nano seconds. + */ +static unsigned long long get_timestamp (void) +{ + unsigned long long t = 0; + struct timespec tp; + int r; + + r = clock_gettime(CLOCK_MONOTONIC, &tp); + if (r == -1) { + LPERROR("Bad clock_gettime!\n"); + return t; + } else { + t = tp.tv_sec * (NS_PER_S); + t += tp.tv_nsec; + } + return t; +} + +static int ipi_irq_handler (int vect_id, void *priv) +{ + (void)vect_id; + (void)priv; + + atomic_flag_clear(&remote_nkicked); + return METAL_IRQ_HANDLED; +} + +/** + * @brief ipi_shmem_echo() - shared memory IPI demo + * This task will: + * * Get the timestamp and put it into the ping shared memory + * * Update the shared memory descriptor for the new available + * ping buffer. + * * Trigger IPI to notifty the remote. + * * Repeat the above steps until it sends out all the packages. + * * Monitor IPI interrupt, verify every received package. + * * After all the packages are received, it sends out shutdown + * message to the remote. + * + * @param[in] shm_io - shared memory metal i/o region + * @return - return 0 on success, otherwise return error number indicating + * type of error. + */ +static int ipi_shmem_echo(struct metal_io_region *shm_io) +{ + int ret; + uint32_t i; + uint32_t rx_avail; + unsigned long tx_avail_offset, rx_avail_offset; + unsigned long rx_used_offset; + unsigned long tx_addr_offset, rx_addr_offset; + unsigned long tx_data_offset, rx_data_offset; + unsigned long long tstart, tend; + long long tdiff; + long long tdiff_avg_s = 0, tdiff_avg_ns = 0; + void *txbuf = NULL, *rxbuf = NULL, *tmpptr; + struct msg_hdr_s *msg_hdr; + uint32_t tx_phy_addr_32; + + txbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!txbuf) { + LPERROR("Failed to allocate local tx buffer for msg.\n"); + ret = -ENOMEM; + goto out; + } + rxbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!rxbuf) { + LPERROR("Failed to allocate local rx buffer for msg.\n"); + ret = -ENOMEM; + goto out; + } + + /* Clear shared memory */ + metal_io_block_set(shm_io, 0, 0, metal_io_region_size(shm_io)); + + /* Set tx/rx buffer address offset */ + tx_avail_offset = SHM_DESC_OFFSET_TX + SHM_DESC_AVAIL_OFFSET; + rx_avail_offset = SHM_DESC_OFFSET_RX + SHM_DESC_AVAIL_OFFSET; + rx_used_offset = SHM_DESC_OFFSET_RX + SHM_DESC_USED_OFFSET; + tx_addr_offset = SHM_DESC_OFFSET_TX + SHM_DESC_ADDR_ARRAY_OFFSET; + rx_addr_offset = SHM_DESC_OFFSET_RX + SHM_DESC_ADDR_ARRAY_OFFSET; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + rx_data_offset = SHM_DESC_OFFSET_RX + SHM_BUFF_OFFSET_RX; + + LPRINTF("Start echo flood testing....\n"); + LPRINTF("Sending msgs to the remote.\n"); + + for (i = 0; i < PKGS_TOTAL; i++) { + + /* Construct a message to send */ + tmpptr = txbuf; + msg_hdr = tmpptr; + msg_hdr->index = i; + msg_hdr->len = sizeof(tstart); + tmpptr += sizeof(struct msg_hdr_s); + tstart = get_timestamp(); + *(unsigned long long *)tmpptr = tstart; + + /* copy message to shared buffer */ + metal_io_block_write(shm_io, tx_data_offset, msg_hdr, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + /* Write to the address array to tell the other end + * the buffer address. + */ + tx_phy_addr_32 = (uint32_t)metal_io_phys(shm_io, + tx_data_offset); + metal_io_write32(shm_io, tx_addr_offset, tx_phy_addr_32); + tx_data_offset += sizeof(struct msg_hdr_s) + msg_hdr->len; + tx_addr_offset += sizeof(uint32_t); + + /* Increase number of available buffers */ + metal_io_write32(shm_io, tx_avail_offset, (i + 1)); + /* Kick IPI to notify data has been put to shared buffer */ + kick_ipi(NULL); + } + + LPRINTF("Waiting for messages to echo back and verify.\n"); + i = 0; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + while (i != PKGS_TOTAL) { + wait_for_notified(&remote_nkicked); + rx_avail = metal_io_read32(shm_io, rx_avail_offset); + while (i != rx_avail) { + uint32_t rx_phy_addr_32; + + /* Received pong from the other side */ + + /* Get the buffer location from the shared memory + * rx address array. + */ + rx_phy_addr_32 = metal_io_read32(shm_io, + rx_addr_offset); + rx_data_offset = metal_io_phys_to_offset(shm_io, + (metal_phys_addr_t)rx_phy_addr_32); + if (rx_data_offset == METAL_BAD_OFFSET) { + LPERROR("failed to get rx [%d] offset: 0x%x.\n", + i, rx_phy_addr_32); + ret = -EINVAL; + goto out; + } + rx_addr_offset += sizeof(rx_phy_addr_32); + + /* Read message header from shared memory */ + metal_io_block_read(shm_io, rx_data_offset, rxbuf, + sizeof(struct msg_hdr_s)); + msg_hdr = (struct msg_hdr_s *)rxbuf; + + /* Check if the message header is valid */ + if (msg_hdr->index != (uint32_t)i) { + LPERROR("wrong msg: expected: %d, actual: %d\n", + i, msg_hdr->index); + ret = -EINVAL; + goto out; + } + if (msg_hdr->len != sizeof(tstart)) { + LPERROR("wrong msg: length invalid: %lu, %u.\n", + sizeof(tstart), msg_hdr->len); + ret = -EINVAL; + goto out; + } + /* Read message */ + rx_data_offset += sizeof(*msg_hdr); + metal_io_block_read(shm_io, + rx_data_offset, + rxbuf + sizeof(*msg_hdr), msg_hdr->len); + rx_data_offset += msg_hdr->len; + /* increase rx used count to indicate it has consumed + * the received data */ + metal_io_write32(shm_io, rx_used_offset, (i + 1)); + + /* Verify message */ + /* Get tx message previously sent*/ + metal_io_block_read(shm_io, tx_data_offset, txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + tx_data_offset += sizeof(*msg_hdr) + sizeof(tstart); + /* Compare the received message and the sent message */ + ret = memcmp(rxbuf, txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + if (ret) { + LPERROR("data[%u] verification failed.\n", i); + LPRINTF("Expected:"); + dump_buffer(txbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + LPRINTF("Actual:"); + dump_buffer(rxbuf, + sizeof(*msg_hdr) + sizeof(tstart)); + ret = -EINVAL; + goto out; + } + + i++; + } + } + tend = get_timestamp(); + tdiff = tend - tstart; + + /* Send shutdown message */ + tmpptr = txbuf; + msg_hdr = tmpptr; + msg_hdr->index = i; + msg_hdr->len = strlen(SHUTDOWN); + tmpptr += sizeof(struct msg_hdr_s); + sprintf(tmpptr, SHUTDOWN); + /* copy message to shared buffer */ + metal_io_block_write(shm_io, + tx_data_offset, + msg_hdr, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + tx_phy_addr_32 = (uint32_t)metal_io_phys(shm_io, + tx_data_offset); + metal_io_write32(shm_io, tx_addr_offset, tx_phy_addr_32); + metal_io_write32(shm_io, tx_avail_offset, PKGS_TOTAL + 1); + LPRINTF("Kick remote to notify shutdown message sent...\n"); + kick_ipi(NULL); + + tdiff /= PKGS_TOTAL; + tdiff_avg_s = tdiff / NS_PER_S; + tdiff_avg_ns = tdiff % NS_PER_S; + LPRINTF("Total packages: %d, time_avg = %lds, %ldns\n", + i, (long int)tdiff_avg_s, (long int)tdiff_avg_ns); + + ret = 0; +out: + if (txbuf) + metal_free_memory(txbuf); + if (rxbuf) + metal_free_memory(rxbuf); + return ret; +} + +int ipi_shmem_demo() +{ + struct metal_device *shm_dev = NULL; + struct metal_io_region *shm_io = NULL; + int ret = 0; + + print_demo("IPI and shared memory"); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &shm_dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + shm_io = metal_device_io_region(shm_dev, 0); + if (!shm_io) { + LPERROR("Failed to map io region for %s.\n", shm_dev->name); + ret = -ENODEV; + goto out; + } + + /* initialize remote_nkicked */ + remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, NULL); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = ipi_shmem_echo(shm_io); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (shm_dev) + metal_device_close(shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c new file mode 100644 index 00000000..d0f9aefd --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demo.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * libmetal_amp_demo.c + * + * This application shows how to use IPI to trigger interrupt and how to + * setup shared memory with libmetal API for communication between processors. + * + * This app does the following: + * 1. Run the shared memory echo demo task ipi_shmem_task() + * * Write message to the APU to RPU shared buffer. + * * Update the APU to RPU shared memory available index. + * * Trigger IPI to the remote. + * * Repeat the above 3 sub steps until it sends all the packages. + * * Wait for IPI to receive all the packages + * * If "shutdown" message is received, cleanup the libmetal source. + * 2. Run shared memory demo with shmem_task(). + * * Open shared memory device. + * * For 1000 times, communicate between local and remote processes + * using shared memory and polling via shared memory. + * * Cleanup shared memory device. + * 3. Run the atomic demo task atomic_shmem_task(): + * * Trigger the IPI to the remote, the remote will then start doing atomic + * add calculation. + * * Start atomic add by 1 for 1000 times to the first 32bit of the shared + * memory descriptor location. + * * Once it receives the IPI interrupt, it will check if the value stored + * in the shared memory descriptor location is 2000. If yes, the atomic + * across the shared memory passed, otherwise, it failed. + * 4. Demonstrate IPI latency with ipi_latency_demo_task() + * * Open IPI and timer devices. + * * For 1000 times, record APU to RPU IPI latency and RPU to APU + * latency. Then report average time for each direction. + * * Cleanup libmetal resources + * 5. Demonstrate shared memory latency with shmem_latency_demo_task() + * * Open shared memory and timer devices. + * * For 1000 times, record APU to RPU shared memory latency and RPU to APU + * latency for 8 bytes, 1/2K and 1K. Then report average time for each + * direction. + * * Cleanup libmetal resources + * 6. Demonstrate shared memory throughput with shmem_throughput_demo_task() + * * Open shared memory, IPI and timer devices. + * * For 1000 times, record APU block read and write times. Notify remote + * to run test, then similarly record RPU block read and write times for + * 1/2KB, 1KB and 2KB. Then report average throughput for each data size + * and operation. + * * Cleanup libmetal resources + */ + +#include +#include +#include +#include +#include "common.h" +#include "sys_init.h" + +/** + * @brief main function of the demo application. + * Here are the steps for the main function: + * * initialize libmetal environment + * * Run the IPI with shared memory demo. + * * Run the shared memory demo. + * * Run the atomic across shared memory demo. + * * Run the ipi latency demo. + * * Run the shared memory latency demo. + * * Run the shared memory throughput demo. + * * Cleanup libmetal environment + * Report if any of the above tasks failed. + * @return 0 - succeeded, non-zero for failures. + */ +int main(int ac, char **av) +{ + int ret, opt; + + ret = sys_init(); + if (ret) { + LPERROR("Failed to initialize system.\n"); + return ret; + } + + while ((opt = getopt(ac, av, "d")) != -1) { + if (opt == 'd') + metal_set_log_level(METAL_LOG_DEBUG); + } + + ret = shmem_demo(); + if (ret) { + LPERROR("shared memory demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = atomic_shmem_demo(); + if (ret) { + LPERROR("shared memory atomic demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = ipi_shmem_demo(); + if (ret) { + LPERROR("IPI and shared memory demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = ipi_latency_demo(); + if (ret) { + LPERROR("IPI latency demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = shmem_latency_demo(); + if (ret) { + LPERROR("shared memory latency demo failed.\n"); + goto out; + } + + /* sleep, to give time for RPU to clean up the last demo. */ + sleep(1); + ret = shmem_throughput_demo(); + if (ret) { + LPERROR("shared memory throughput demo failed.\n"); + goto out; + } + +out: + sys_cleanup(); + + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c new file mode 100644 index 00000000..0eade5c2 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/libmetal_amp_demod.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2016, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sys_init.h" + +#define IPI_TRIG_OFFSET 0x0 +#define IPI_OBS_OFFSET 0x4 +#define IPI_ISR_OFFSET 0x10 +#define IPI_IMR_OFFSET 0x14 +#define IPI_IER_OFFSET 0x18 +#define IPI_IDR_OFFSET 0x1C + +#ifndef IPI_MASK +#define IPI_MASK 0x1000000 + +#endif /* IPI_MASK */ +#ifndef IPI_DEV_NAME +#define IPI_DEV_NAME "ff310000.ipi" +#endif /* IPI_DEV_NAME */ +#define SHM0_DESC_DEV_NAME "3ed00000.shm_desc" +#define SHM1_DESC_DEV_NAME "3ed10000.shm_desc" +#define SHM_DEV_NAME "3ed20000.shm" +#define BUS_NAME "platform" +#define D0_SHM_OFFSET 0x00000 +#define D1_SHM_OFFSET 0x20000 + +#define BUF_SIZE_MAX 512 +#define SHUTDOWN "shutdown" + +#define LPRINTF(format, ...) \ + printf("SERVER> " format, ##__VA_ARGS__) + +struct shm_mg_s { + uint32_t avails; + uint32_t used; +}; + +typedef uint64_t shm_addr_t; + +struct msg_hdr_s { + uint32_t index; + int32_t len; +}; + +struct channel_s { + struct metal_device *ipi_dev; + struct metal_io_region *ipi_io; + unsigned int ipi_mask; + struct metal_device *shm0_desc_dev; + struct metal_io_region *shm0_desc_io; + struct metal_device *shm1_desc_dev; + struct metal_io_region *shm1_desc_io; + struct metal_device *shm_dev; + struct metal_io_region *shm_io; + atomic_int notified; + unsigned long d0_start_offset; + unsigned long d1_start_offset; +}; + +static struct channel_s ch0; + +extern int system_init(); +extern int run_comm_task(void *task, void *arg); +extern void wait_for_interrupt(void); + +static int ipi_irq_isr (int vect_id, void *priv) +{ + (void)vect_id; + struct channel_s *ch = (struct channel_s *)priv; + uint64_t val = 1; + + if (!ch) + return METAL_IRQ_NOT_HANDLED; + val = metal_io_read32(ch->ipi_io, IPI_ISR_OFFSET); + if (val & ch->ipi_mask) { + metal_io_write32(ch->ipi_io, IPI_ISR_OFFSET, ch->ipi_mask); + atomic_flag_clear(&ch->notified); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +static int ipi_task_shm_atomicd(void *arg) +{ + struct channel_s *ch = (struct channel_s *)arg; + atomic_int *shm_int; + unsigned int flags; + int i; + + shm_int = (atomic_int *)metal_io_virt(ch->shm0_desc_io, 0); + + LPRINTF("Wait for atomic test to start.\n"); + while (1) { + do { + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(&ch->notified)) { + metal_irq_restore_enable(flags); + break; + } + wait_for_interrupt(); + metal_irq_restore_enable(flags); + } while(1); + for (i = 0; i < 1000; i++) + atomic_fetch_add(shm_int, 1); + //*((unsigned int volatile *)shm_int) += 1; + /* memory barrier */ + atomic_thread_fence(memory_order_acq_rel); + + /* Send the message */ + LPRINTF("SENDING message...\n"); + metal_io_write32(ch->ipi_io, IPI_TRIG_OFFSET, ch->ipi_mask); + break; + } + + return 0; +} + +static int ipi_task_echod(void *arg) +{ + struct channel_s *ch = (struct channel_s *)arg; + struct shm_mg_s *shm0_mg, *shm1_mg; + shm_addr_t *shm0_addr_array, *shm1_addr_array; + struct msg_hdr_s *msg_hdr; + unsigned int flags; + void *d0, *d1, *lbuf; + metal_phys_addr_t d0_pa; + int len; + + shm0_mg = (struct shm_mg_s *)metal_io_virt(ch->shm0_desc_io, 0); + shm1_mg = (struct shm_mg_s *)metal_io_virt(ch->shm1_desc_io, 0); + shm0_addr_array = (void *)shm0_mg + sizeof(struct shm_mg_s); + shm1_addr_array = (void *)shm1_mg + sizeof(struct shm_mg_s); + d1 = metal_io_virt(ch->shm_io, ch->d1_start_offset); + lbuf = malloc(BUF_SIZE_MAX); + if (!lbuf) { + LPRINTF("ERROR: Failed to allocate local buffer for msg.\n"); + return -1; + } + + LPRINTF("Wait for echo test to start.\n"); + while (1) { + do { + flags = metal_irq_save_disable(); + if (!atomic_flag_test_and_set(&ch->notified)) { + metal_irq_restore_enable(flags); + break; + } + wait_for_interrupt(); + metal_irq_restore_enable(flags); + } while(1); + atomic_thread_fence(memory_order_acq_rel); + while(shm0_mg->used != shm0_mg->avails) { + d0_pa = (metal_phys_addr_t)shm0_addr_array[shm0_mg->used]; + d0 = metal_io_phys_to_virt(ch->shm_io, d0_pa); + if (!d0) { + LPRINTF("ERROR: failed to get rx addr:0x%lx.\n", + d0_pa); + goto out; + } + /* Copy msg header from shared buf to local mem */ + len = metal_io_block_read(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d0), + lbuf, sizeof(struct msg_hdr_s)); + if (len < (int)sizeof(struct msg_hdr_s)) { + LPRINTF("ERROR: failed to get msg header.\n"); + goto out; + } + msg_hdr = lbuf; + if (msg_hdr->len < 0) { + LPRINTF("ERROR: wrong msg length: %d.\n", + (int)msg_hdr->len); + goto out; + } else { + /* copy msg data from shared buf to local mem */ + d0 += sizeof(struct msg_hdr_s); + len = metal_io_block_read(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d0), + lbuf + sizeof(struct msg_hdr_s), + msg_hdr->len); +#if DEBUG + LPRINTF("received: %d, %d\n", + (int)msg_hdr->index, (int)msg_hdr->len); +#endif + /* Check if the it is the shutdown message */ + if (!strncmp((lbuf + sizeof(struct msg_hdr_s)), + SHUTDOWN, sizeof(SHUTDOWN))) { + LPRINTF("Received shutdown message\n"); + goto out; + } + } + /* Copy the message back to the other end */ + metal_io_block_write(ch->shm_io, + metal_io_virt_to_offset(ch->shm_io, d1), + lbuf, + sizeof(struct msg_hdr_s) + msg_hdr->len); + + /* Update the d1 address */ + shm1_addr_array[shm1_mg->avails] = + (uint64_t)metal_io_virt_to_phys( + ch->shm_io, d1); + d1 += (sizeof(struct msg_hdr_s) + msg_hdr->len); + shm0_mg->used++; + shm1_mg->avails++; + /* memory barrier */ + atomic_thread_fence(memory_order_acq_rel); + + /* Send the message */ + metal_io_write32(ch->ipi_io, IPI_TRIG_OFFSET, + ch->ipi_mask); + } + } + +out: + free(lbuf); + return 0; +} + +int main(void) +{ + struct metal_device *device; + struct metal_io_region *io; + int irq; + uint32_t val; + int ret = 0; + + ret = sys_init(); + if (ret) { + LPRINTF("ERROR: Failed to initialize system\n"); + return -1; + } + memset(&ch0, 0, sizeof(ch0)); + + atomic_store(&ch0.notified, 1); + + /* Open IPI device */ + ret = metal_device_open(BUS_NAME, IPI_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", IPI_DEV_NAME); + goto out; + } + + /* Map IPI device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + + /* Store the IPI device and I/O region */ + ch0.ipi_dev = device; + ch0.ipi_io = io; + + /* Open shared memory0 descriptor device */ + ret = metal_device_open(BUS_NAME, SHM0_DESC_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", + SHM0_DESC_DEV_NAME); + goto out; + } + + /* Map shared memory0 descriptor device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + /* Store the shared memory0 descriptor device and I/O region */ + ch0.shm0_desc_dev = device; + ch0.shm0_desc_io = io; + + /* Open shared memory1 descriptor device */ + ret = metal_device_open(BUS_NAME, SHM1_DESC_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", + SHM1_DESC_DEV_NAME); + goto out; + } + + /* Map shared memory1 descriptor device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + /* Store the shared memory0 descriptor device and I/O region */ + ch0.shm1_desc_dev = device; + ch0.shm1_desc_io = io; + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &device); + if (ret) { + LPRINTF("ERROR: Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Map shared memory device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPRINTF("ERROR: Failed to map io regio for %s.\n", + device->name); + metal_device_close(device); + ret = -ENODEV; + goto out; + } + + /* Store the shared memory device and I/O region */ + ch0.shm_dev = device; + ch0.shm_io = io; + ch0.d1_start_offset = D1_SHM_OFFSET; + + /* Get interrupt ID from IPI metal device */ + irq = (intptr_t)ch0.ipi_dev->irq_info; + if (irq < 0) { + LPRINTF("ERROR: Failed to request interrupt for %s.\n", + device->name); + ret = -EINVAL; + goto out; + } + + ch0.ipi_mask = IPI_MASK; + + LPRINTF("Try to register IPI interrupt.\n"); + ret = metal_irq_register(irq, ipi_irq_isr, &ch0); + LPRINTF("registered IPI interrupt.\n"); + if (ret) + goto out; + metal_irq_enable(irq); + + /* Enable interrupt */ + metal_io_write32(ch0.ipi_io, IPI_IER_OFFSET, ch0.ipi_mask); + val = metal_io_read32(ch0.ipi_io, IPI_IMR_OFFSET); + if (val & ch0.ipi_mask) { + LPRINTF("ERROR: Failed to enable IPI interrupt.\n"); + ret = -1; + goto out; + } + LPRINTF("enabled IPI interrupt.\n"); + ret = ipi_task_shm_atomicd((void *)&ch0); + if (ret) { + LPRINTF("ERROR: Failed to run shared memory atomic task.\n"); + goto out; + } + ret = ipi_task_echod((void*)&ch0); + if (ret) + LPRINTF("ERROR: Failed to run IPI communication task.\n"); + +out: + if (ch0.ipi_dev) + metal_device_close(ch0.ipi_dev); + if (ch0.shm0_desc_dev) + metal_device_close(ch0.shm0_desc_dev); + if (ch0.shm1_desc_dev) + metal_device_close(ch0.shm1_desc_dev); + if (ch0.shm_dev) + metal_device_close(ch0.shm_dev); + sys_cleanup(); + + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c new file mode 100644 index 00000000..017affb9 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_atomic_demo.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * atomic_shmem_demod.c - Shared memory atomic operation demo + * This demo will: + * + * 1. Open the shared memory device. + * 2. Open the IPI device. + * 3. Register IPI interrupt handler. + * 4. Kick IPI to notify the other end to start the demo + * 5. Start atomic add by 1 for 5000 times over the shared memory + * 6. Wait for remote IPI kick to know when the remote has finished the demo. + * 7. Verify the result. As two sides both have done 5000 times of adding 1, + * check if the end result is 5000*2. + * 8. Clean up: deregister the IPI interrupt handler, close the IPI device + * , close the shared memory device. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define ATOMIC_INT_OFFSET 0x0 /* shared memory offset for atomic operation */ +#define ITERATIONS 5000 + +static atomic_flag remote_nkicked; /* is remote kicked, 0 - kicked, + 1 - not-kicked */ + +static int ipi_irq_handler (int vect_id, void *priv) +{ + (void)vect_id; + (void)priv; + atomic_flag_clear(&remote_nkicked); + return METAL_IRQ_HANDLED; +} + +/** + * @brief atomic_add_shmem() - Shared memory atomic operation demo + * This task will: + * * Write to shared memory to notify the remote to start atomic add on + * the shared memory for 1000 times. + * * Start atomic add by 1 for 5000 times to first 32 bits of memory in + * the shared memory which is pointed to by shm_io. + * * Wait for the remote to write to shared memory + * * Once it received the polling kick from the remote, it will check + * if the value stored in the shared memory is the same as the + * expected. + * * It will print if the atomic add test has passed or not. + * @param[in] shm_io - shared memory metal i/o region + * @return - If setup failed, return the corresponding error number. Otherwise + * return 0 on success. + */ +static int atomic_add_shmem(struct metal_io_region *shm_io) +{ + int i, ret; + atomic_int *shm_int; + + LPRINTF("Starting atomic shared memory task.\n"); + + /* Initialize the shared memory on which we run the atomic add */ + shm_int = (atomic_int *)metal_io_virt(shm_io, + ATOMIC_INT_OFFSET); + atomic_store(shm_int, 0); + + /* Kick the remote to notify demo starts. */ + kick_ipi(NULL); + + /* Do atomic add over the shared memory */ + for (i = 0; i < ITERATIONS; i++) { + atomic_fetch_add(shm_int, 1); + } + + /* Wait for kick from RPU to know when RPU finishes the demo */ + wait_for_notified(&remote_nkicked); + + if (atomic_load(shm_int) == (ITERATIONS << 1 )) { + LPRINTF("shm atomic demo PASSED!\n"); + ret = 0; + } else { + LPRINTF("shm atomic demo FAILED. expected: %u, actual: %u\n", + (unsigned int)(ITERATIONS << 1), atomic_load(shm_int)); + ret = -1; + } + + return ret; +} + +int atomic_shmem_demo() +{ + struct metal_device *shm_dev = NULL; + struct metal_io_region *shm_io = NULL; + int ret = 0; + + print_demo("atomic operation over shared memory"); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &shm_dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + shm_io = metal_device_io_region(shm_dev, 0); + if (!shm_io) { + LPERROR("Failed to map io region for %s.\n", shm_dev->name); + ret = -ENODEV; + goto out; + } + + /* initialize remote_nkicked */ + remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, NULL); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = atomic_add_shmem(shm_io); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (shm_dev) + metal_device_close(shm_dev); + return ret; + +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c new file mode 100644 index 00000000..bf576a79 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_demo.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + + /***************************************************************************** + * shmem_demo.c + * This demo demonstrates the use of shared mem. between the APU and RPU. + * This demo does so via the following steps: + * + * 1. Open the shared memory device. + * 2. Clear the demo control TX/RX available values in shared memory. + * 3. APU set demo control in shared memory to notify RPU demo has started + * 4. APU will write message to the shared memory. + * 5. APU will increase TX avail values in the shared memory to notify RPU + * there is a message ready to read. + * 6. APU will poll the RX avail value in th shared memory to see if RPU + * has echoed back the message into the shared memory. + * 7. When APU knows there is new RX message available, it will read the + * RX message from the shared memory. + * 8. APU will verify the message to see if it matches the one it has sent. + * 9. Close the shared memory device. + * + * Here is the Shared memory structure of this demo: + * |0 | 4Bytes | DEMO control status shows if demo starts or not | + * |0x04 | 4Bytes | number of APU to RPU buffers available to RPU | + * |0x08 | 4Bytes | number of APU to RPU buffers consumed by RPU | + * |0x0c | 4Bytes | number of RPU to APU buffers available to APU | + * |0x10 | 4Bytes | number of RPU to APU buffers consumed by APU | + * |0x14 | 1KBytes | APU to RPU buffer | + * ... ... + * |0x800 | 1KBytes | RPU to APU buffer | + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* Shared memory offsets */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 +#define SHM_TX_AVAIL_OFFSET 0x04 +#define SHM_RX_AVAIL_OFFSET 0x0C +#define SHM_TX_BUFFER_OFFSET 0x14 +#define SHM_RX_BUFFER_OFFSET 0x800 + +#define SHM_BUFFER_SIZE 0x400 + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define TEST_MSG "Hello World - libmetal shared memory demo" + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +/** + * @brief shmem_task() - Show use of shared memory with Libmetal. + * Write message to RPU. RPU will then read and echo + * back. Confirm if echoed message is identical. + * If messages differ, report error. + * + * Steps: + * 1. Clear demo control and TX/RX available values + * + * @param[in] shm_io - metal i/o region of the shared memory + * @return - return 0 on success, otherwise return error number indicating + * type of error + */ +int shmem_echo(struct metal_io_region *shm_io) +{ + void *tx_data = NULL; + void *rx_data = NULL; + unsigned int tx_count = 0; + unsigned int rx_count = 0; + struct msg_hdr_s *msg_hdr; + unsigned int data_len; + int ret; + + LPRINTF("Setting up shared memory demo.\n"); + /* clear demo status value */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Clear TX/RX avail */ + metal_io_write32(shm_io, SHM_TX_AVAIL_OFFSET, 0); + metal_io_write32(shm_io, SHM_RX_AVAIL_OFFSET, 0); + + LPRINTF("Starting shared memory demo.\n"); + /* Notify the remote the demo starts */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + /* preparing data to send */ + data_len = sizeof(struct msg_hdr_s) + strlen(TEST_MSG) + 1; + tx_data = metal_allocate_memory(data_len); + if (!tx_data) { + LPERROR("Failed to allocate memory.\n"); + ret = -1; + goto out; + } + msg_hdr = (struct msg_hdr_s *)tx_data; + msg_hdr->index = tx_count; + msg_hdr->len = strlen(TEST_MSG) + 1; + sprintf(tx_data + sizeof(*msg_hdr), TEST_MSG); + LPRINTF("Sending message: %s\n", + (char *)(tx_data + sizeof(*msg_hdr))); + + /* write data to the shared memory*/ + ret = metal_io_block_write(shm_io, SHM_TX_BUFFER_OFFSET, + tx_data, data_len); + if (ret < 0){ + LPERROR("Unable to metal_io_block_write()\n"); + goto out; + } + /* Increase number of buffers available to notify the remote */ + tx_count++; + metal_io_write32(shm_io, SHM_TX_AVAIL_OFFSET, tx_count); + + /* wait for remote to echo back the data */ + while (metal_io_read32(shm_io, SHM_RX_AVAIL_OFFSET) == rx_count); + rx_count++; + /* New RX data is available, allocate buffer to received data */ + rx_data = metal_allocate_memory(data_len); + if (!rx_data) { + LPERROR("Failed to allocate memory\n"); + ret = -1; + goto out; + } + /* read data from the shared memory*/ + metal_io_block_read(shm_io, SHM_RX_BUFFER_OFFSET, + rx_data, data_len); + if (ret < 0){ + LPERROR("Unable to metal_io_block_read()\n"); + goto out; + } + /* verify the received data */ + ret = memcmp(tx_data, rx_data, data_len); + if (ret) { + LPERROR("Received data verification failed.\n"); + LPRINTF("Expected:"); + dump_buffer(tx_data, data_len); + LPRINTF("Actual:"); + dump_buffer(rx_data, data_len); + } else { + LPRINTF("Message Received: %s\n", + (char *)(rx_data + sizeof(*msg_hdr))); + } + /* Notify the remote the demo has finished. */ + metal_io_write32(shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_IDLE); + +out: + if (tx_data) + metal_free_memory(tx_data); + if (rx_data) + metal_free_memory(rx_data); + LPRINTF("Shared memory demo: %s.\n", ret ? "Failed": "Passed" ); + return ret; +} + +int shmem_demo() +{ + struct metal_device *device = NULL; + struct metal_io_region *io = NULL; + int ret = 0; + + print_demo("shared memory"); + + /* Open the shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &device); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + return ret; + } + + /* get shared memory device IO region */ + io = metal_device_io_region(device, 0); + if (!io) { + LPERROR("Failed to get io region for %s.\n", device->name); + ret = -ENODEV; + goto out; + } + + /* Run the demo */ + ret = shmem_echo(io); + +out: + if (device) + metal_device_close(device); + return ret; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c new file mode 100644 index 00000000..8374f257 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_latency_demo.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * shmem_latency_demo.c + * This demo demonstrates the shared mem. latency between the APU and RPU. + * This demo does so via the following steps: + * + * 1. Get the shared memory device I/O region. + * 1. Get the TTC timer device I/O region. + * 2. Get the IPI device I/O region. + * 3. Register IPI interrupt handler. + * 4. Write to shared memory to indicate demo starts + * 5. Reset the APU to RPU TTC counter, write data to the shared memory, then + * kick IPI to notify the remote. + * 6. When it receives IPI interrupt, the IPI interrupt handler marks the + * remote has kicked. + * 7. Accumulate APU to RPU and RPU to APU counter values. + * 8. Repeat step 5, 6 and 7 for 1000 times + * 9. Write shared memory to indicate RPU about demo finishes and kick + * IPI to notify. + * 10. Clean up: disable IPI interrupt, deregister the IPI interrupt handler. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 +#define NS_PER_TTC_TICK (NS_PER_SEC / TTC_CLK_FREQ_HZ) + +/* Shared memory offset */ +#define SHM_DEMO_CNTRL_OFFSET 0x0 /* Shared memory for the demo status */ +#define SHM_BUFF_OFFSET_TX 0x1000 /* Shared memory TX buffer start offset */ +#define SHM_BUFF_OFFSET_RX 0x2000 /* Shared memory RX buffer start offset */ + +#define DEMO_STATUS_IDLE 0x0 +#define DEMO_STATUS_START 0x1 /* Status value to indicate demo start */ + +#define ITERATIONS 1000 + +#define BUF_SIZE_MAX 4096 +#define PKG_SIZE_MIN 16 +#define PKG_SIZE_MAX 1024 + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +struct msg_hdr_s { + uint32_t index; + uint32_t len; +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief measure_shmem_latency() - Measure latency of using shared memory + * and IPI with libmetal. + * Repeatedly send a message to RPU and then detect IPI from RPU + * and measure the latency. Similarly, measure the latency from RPU + * to APU. Each iteration, record this latency and after the loop + * has finished, report the total latency in nanseconds. + * Notes: + * - RPU will repeatedly wait for IPI from APU until APU + * notifies remote demo has finished by setting the value in the + * shared memory. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_shmem_latency(struct channel_s *ch) +{ + size_t s; + struct msg_hdr_s *msg_hdr; + void *lbuf; + int ret, i; + + LPRINTF("Starting shared memory latency task\n\t" + "TTC [min,max] are in TTC ticks: %d ns per tick\n", + NS_PER_TTC_TICK); + /* allocate memory for receiving data */ + lbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!lbuf) { + LPERROR("Failed to allocate memory.\r\n"); + return -1; + } + memset(lbuf, 0xA, BUF_SIZE_MAX); + + /* write to shared memory to indicate demo has started */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, DEMO_STATUS_START); + + for (s = PKG_SIZE_MIN; s <= PKG_SIZE_MAX; s <<= 1) { + struct metal_stat a2r = STAT_INIT; + struct metal_stat r2a = STAT_INIT; + for (i = 1; i <= ITERATIONS; i++) { + /* Reset TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* prepare data */ + msg_hdr = lbuf; + msg_hdr->index = i; + msg_hdr->len = s - sizeof(*msg_hdr); + /* Copy data to the shared memory */ + ret = metal_io_block_write(ch->shm_io, + SHM_BUFF_OFFSET_TX, lbuf, s); + if ((size_t)ret != s) { + LPERROR("Write shm failure: %lu,%lu\n", + s, (size_t)ret); + ret = -1; + goto out; + } + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + /* irq handler stops timer for rpu->apu irq */ + wait_for_notified(&ch->remote_nkicked); + /* Read message */ + metal_io_block_read(ch->shm_io, + SHM_BUFF_OFFSET_RX, + lbuf, s); + msg_hdr = lbuf; + if (msg_hdr->len != (s - sizeof(*msg_hdr))) { + LPERROR("Read shm failure: %lu,%lu\n", + s, msg_hdr->len + sizeof(*msg_hdr)); + ret = -1; + goto out; + } + /* Stop RPU to APU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + + update_stat(&a2r, read_timer(ch->ttc_io, + TTC_CNT_APU_TO_RPU)); + update_stat(&r2a, read_timer(ch->ttc_io, + TTC_CNT_RPU_TO_APU)); + } + + /* report avg latencies */ + LPRINTF("package size %lu latency:\n", s); + LPRINTF(" APU to RPU: [%lu, %lu] avg: %lu ns\n", + a2r.st_min, a2r.st_max, + a2r.st_sum * NS_PER_TTC_TICK / ITERATIONS); + LPRINTF(" RPU to APU: [%lu, %lu] avg: %lu ns\n", + r2a.st_min, r2a.st_max, + r2a.st_sum * NS_PER_TTC_TICK / ITERATIONS); + } + + /* write to shared memory to indicate demo has finished */ + metal_io_write32(ch->shm_io, SHM_DEMO_CNTRL_OFFSET, 0); + /* Kick IPI to notify the remote */ + kick_ipi(NULL); + + LPRINTF("Finished shared memory latency task\n"); + +out: + metal_free_memory(lbuf); + return 0; +} + +int shmem_latency_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("shared memory latency"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_shmem_latency(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c new file mode 100644 index 00000000..073fe379 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/shmem_throughput_demo.c @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2017, Xilinx Inc. and Contributors. All rights reserved. + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/***************************************************************************** + * shmem_throughput_demo_task.c + * This is the remote side of the shared memory throughput demo. + * This demo does the following steps: + * + * 1. Get the shared memory device libmetal I/O region. + * 1. Get the TTC timer device libemtal I/O region. + * 2. Get IPI device libmetal I/O region and the IPI interrupt vector. + * 3. Register IPI interrupt handler. + * 6. Upload throughput measurement: + * Start TTC APU counter, write data to shared memory and kick IPI to + * notify remote. It will iterate for 1000 times, stop TTC APU counter. + * Wait for RPU IPI kick to know RPU has finished receiving packages + * and RPU TX counter is ready to read. Read the APU TX and RPU RX + * counter values and save them. Repeat for different package sizes. + * After this measurement, kick IPI to notify the remote, the + * measurement has finished. + * 7. Download throughput measurement: + * Start TTC APU counter, wait for IPI kick, check if data is available, + * if yes, read as much data as possible from shared memory. It will + * iterates until 1000 packages have been received, stop TTC APU counter. + * Wait for RPU IPI kick so that APU can get the TTC RPU TX counter + * value. Kick IPI to notify the remote it has read the TTCi counter. + * Repeat for different package size. + * 8. Cleanup resource: + * disable IPI interrupt and deregister the IPI interrupt handler. + * + * Here is the Shared memory structure of this demo: + * |0x0 - 0x03 | number of APU to RPU buffers available to RPU | + * |0x04 - 0x1FFFFF | address array for shared buffers from APU to RPU | + * |0x200000 - 0x200004 | number of RPU to APU buffers available to APU | + * |0x200004 - 0x3FFFFF | address array for shared buffers from RPU to APU | + * |0x400000 - 0x7FFFFF | APU to RPU buffers | + * |0x800000 - 0xAFFFFF | RPU to APU buffers | + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define TTC_CNT_APU_TO_RPU 2 /* APU to RPU TTC counter ID */ +#define TTC_CNT_RPU_TO_APU 3 /* RPU to APU TTC counter ID */ + +#define TTC_CLK_FREQ_HZ 100000000 +#define NS_PER_SEC 1000000000 + +/* Shared memory offsets */ +#define SHM_DESC_OFFSET_TX 0x0 +#define SHM_BUFF_OFFSET_TX 0x400000 +#define SHM_DESC_OFFSET_RX 0x200000 +#define SHM_BUFF_OFFSET_RX 0x800000 + +/* Shared memory descriptors offset */ +#define SHM_DESC_AVAIL_OFFSET 0x00 +#define SHM_DESC_ADDR_ARRAY_OFFSET 0x04 + +#define ITERATIONS 1000 + +#define BUF_SIZE_MAX 4096 +#define PKG_SIZE_MAX 1024 +#define PKG_SIZE_MIN 16 +#define TOTAL_DATA_SIZE (1024 * 4096) + +struct channel_s { + struct metal_device *shm_dev; /* Shared memory metal device */ + struct metal_io_region *shm_io; /* Shared memory metal i/o region */ + struct metal_device *ttc_dev; /* TTC metal device */ + struct metal_io_region *ttc_io; /* TTC metal i/o region */ + atomic_flag remote_nkicked; /* 0 - kicked from remote */ +}; + +/** + * @brief read_timer() - return TTC counter value + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter ID + */ +static inline uint32_t read_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + unsigned long offset = XTTCPS_CNT_VAL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + return metal_io_read32(ttc_io, offset); +} + +/** + * @brief reset_timer() - function to reset TTC counter + * Set the RST bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void reset_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_RST_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief stop_timer() - function to stop TTC counter + * Set the disable bit in the Count Control Reg. + * + * @param[in] ttc_io - TTC timer i/o region + * @param[in] cnt_id - counter id + */ +static inline void stop_timer(struct metal_io_region *ttc_io, + unsigned long cnt_id) +{ + uint32_t val; + unsigned long offset = XTTCPS_CNT_CNTRL_OFFSET + + XTTCPS_CNT_OFFSET(cnt_id); + + val = XTTCPS_CNT_CNTRL_DIS_MASK; + metal_io_write32(ttc_io, offset, val); +} + +/** + * @brief ipi_irq_handler() - IPI interrupt handler + * It will clear the notified flag to mark it's got an IPI interrupt. + * It will stop the RPU->APU timer and will clear the notified + * flag to mark it's got an IPI interrupt + * + * @param[in] vect_id - IPI interrupt vector ID + * @param[in/out] priv - communication channel data for this application. + * + * @return - If the IPI interrupt is triggered by its remote, it returns + * METAL_IRQ_HANDLED. It returns METAL_IRQ_NOT_HANDLED, if it is + * not the interrupt it expected. + * + */ +static int ipi_irq_handler (int vect_id, void *priv) +{ + struct channel_s *ch = (struct channel_s *)priv; + + (void)vect_id; + + if (ch) { + atomic_flag_clear(&ch->remote_nkicked); + return METAL_IRQ_HANDLED; + } + return METAL_IRQ_NOT_HANDLED; +} + +/** + * @brief measure_shmem_throughput() - Show throughput of using shared memory. + * - Upload throughput measurement: + * Start TTC APU counter, write data to shared memory and kick IPI to + * notify remote. It will iterate for 1000 times, stop TTC APU + * counter. Wait for RPU IPI kick to know RPU has finished receiving + * packages and RPU TX counter is ready to read. Read the APU TX and + * RPU RX counter values and save them. Repeat for different package + * sizes. After this measurement, kick IPI to notify the remote, the + * measurement has finished. + * - Download throughput measurement: + * Start TTC APU counter, wait for IPI kick, check if data is + * available, if yes, read as much data as possible from shared + * memory. It will iterates until 1000 packages have been received, + * stop TTC APU counter. Wait for RPU IPI kick so that APU can get + * the TTC RPU TX counter value. Kick IPI to notify the remote it + * has read the TTCi counter. Repeat for different package size. + * + * @param[in] ch - channel information, which contains the IPI i/o region, + * shared memory i/o region and the ttc timer i/o region. + * @return - 0 on success, error code if failure. + */ +static int measure_shmem_throughput(struct channel_s* ch) +{ + void *lbuf = NULL; + int ret = 0; + size_t s, i; + uint32_t rx_count, rx_avail, tx_count, iterations; + unsigned long tx_avail_offset, rx_avail_offset; + unsigned long tx_addr_offset, rx_addr_offset; + unsigned long tx_data_offset, rx_data_offset; + uint32_t buf_phy_addr_32; + uint32_t *apu_tx_count = NULL; + uint32_t *apu_rx_count = NULL; + uint32_t *rpu_tx_count = NULL; + uint32_t *rpu_rx_count = NULL; + + /* allocate memory for receiving data */ + lbuf = metal_allocate_memory(BUF_SIZE_MAX); + if (!lbuf) { + LPERROR("Failed to allocate memory.\r\n"); + return -ENOMEM; + } + memset(lbuf, 0xA, BUF_SIZE_MAX); + + /* allocate memory for saving counter values */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<=1, i++); + apu_tx_count = metal_allocate_memory(i * sizeof(uint32_t)); + apu_rx_count = metal_allocate_memory(i * sizeof(uint32_t)); + rpu_tx_count = metal_allocate_memory(i * sizeof(uint32_t)); + rpu_rx_count = metal_allocate_memory(i * sizeof(uint32_t)); + if (!apu_tx_count || !apu_rx_count || !rpu_tx_count || !rpu_rx_count) { + LPERROR("Failed to allocate memory.\r\n"); + ret = -ENOMEM; + goto out; + } + + /* Clear shared memory */ + metal_io_block_set(ch->shm_io, 0, 0, metal_io_region_size(ch->shm_io)); + + LPRINTF("Starting shared mem throughput demo\n"); + + /* for each data size, measure send throughput */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + tx_count = 0; + iterations = TOTAL_DATA_SIZE / s; + /* Set tx buffer address offset */ + tx_avail_offset = SHM_DESC_OFFSET_TX + SHM_DESC_AVAIL_OFFSET; + tx_addr_offset = SHM_DESC_OFFSET_TX + + SHM_DESC_ADDR_ARRAY_OFFSET; + tx_data_offset = SHM_DESC_OFFSET_TX + SHM_BUFF_OFFSET_TX; + /* Reset APU TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + while (tx_count < iterations) { + /* Write data to the shared memory*/ + metal_io_block_write(ch->shm_io, tx_data_offset, + lbuf, s); + + /* Write to the address array to tell the other end + * the buffer address. + */ + buf_phy_addr_32 = (uint32_t)metal_io_phys(ch->shm_io, + tx_data_offset); + metal_io_write32(ch->shm_io, tx_addr_offset, + buf_phy_addr_32); + tx_data_offset += s; + tx_addr_offset += sizeof(buf_phy_addr_32); + + /* Increase number of available buffers */ + tx_count++; + metal_io_write32(ch->shm_io, tx_avail_offset, + tx_count); + /* Kick IPI to notify RPU data is ready in + * the shared memory */ + kick_ipi(NULL); + } + /* Stop RPU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Wait for RPU to signal RPU RX TTC counter is ready to + * read */ + wait_for_notified(&ch->remote_nkicked); + /* Read TTC counter values */ + apu_tx_count[i] = read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + rpu_rx_count[i] = read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + } + + /* Kick IPI to notify RPU that APU has read the RPU RX TTC counter + * value */ + kick_ipi(NULL); + + /* for each data size, meaasure block read throughput */ + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + rx_count = 0; + iterations = TOTAL_DATA_SIZE / s; + /* Set rx buffer address offset */ + rx_avail_offset = SHM_DESC_OFFSET_RX + SHM_DESC_AVAIL_OFFSET; + rx_addr_offset = SHM_DESC_OFFSET_RX + + SHM_DESC_ADDR_ARRAY_OFFSET; + rx_data_offset = SHM_DESC_OFFSET_RX + SHM_BUFF_OFFSET_RX; + + wait_for_notified(&ch->remote_nkicked); + /* Data has arrived, seasure start. Reset RPU TTC counter */ + reset_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + while (1) { + rx_avail = metal_io_read32(ch->shm_io, rx_avail_offset); + while(rx_count != rx_avail) { + /* Get the buffer location from the shared + * memory rx address array. + */ + buf_phy_addr_32 = metal_io_read32(ch->shm_io, + rx_addr_offset); + rx_data_offset = metal_io_phys_to_offset( + ch->shm_io, + (metal_phys_addr_t)buf_phy_addr_32); + if (rx_data_offset == METAL_BAD_OFFSET) { + LPERROR( + "[%u]failed to get rx offset: 0x%x, 0x%lx.\n", + rx_count, buf_phy_addr_32, + metal_io_phys(ch->shm_io, + rx_addr_offset)); + ret = -EINVAL; + goto out; + } + rx_addr_offset += sizeof(buf_phy_addr_32); + /* Read data from shared memory */ + metal_io_block_read(ch->shm_io, rx_data_offset, + lbuf, s); + rx_count++; + } + if (rx_count < iterations) + /* Need to wait for more data */ + wait_for_notified(&ch->remote_nkicked); + else + break; + } + /* Stop RPU TTC counter */ + stop_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + /* Clear remote kicked flag -- 0 is kicked */ + atomic_flag_clear(&ch->remote_nkicked); + atomic_flag_test_and_set(&ch->remote_nkicked); + /* Kick IPI to notify remote it is ready to read data */ + kick_ipi(NULL); + /* Wait for RPU to signal RPU TX TTC counter is ready to + * read */ + wait_for_notified(&ch->remote_nkicked); + /* Read TTC counter values */ + apu_rx_count[i] = read_timer(ch->ttc_io, TTC_CNT_APU_TO_RPU); + rpu_tx_count[i] = read_timer(ch->ttc_io, TTC_CNT_RPU_TO_APU); + /* Kick IPI to notify RPU APU has read the RPU TX TTC counter + * value */ + kick_ipi(NULL); + } + + /* Print the measurement result */ + float mbs = TTC_CLK_FREQ_HZ * (TOTAL_DATA_SIZE / MB); + for (s = PKG_SIZE_MIN, i = 0; s <= PKG_SIZE_MAX; s <<= 1, i++) { + LPRINTF("Shared memory throughput of pkg size %lu : \n", s); + LPRINTF(" APU send: %u, %.1f MB/s\n", apu_tx_count[i], + mbs / apu_tx_count[i]); + LPRINTF(" RPU receive: %u, %.1f MB/s\n", rpu_rx_count[i], + mbs / rpu_rx_count[i]); + LPRINTF(" RPU send: %u, %.1f MB/s\n", rpu_tx_count[i], + mbs / rpu_tx_count[i]); + LPRINTF(" APU receive: %u, %.1f MB/s\n", apu_rx_count[i], + mbs / apu_rx_count[i]); + } + + LPRINTF("Finished shared memory throughput\n"); + +out: + if (lbuf) + metal_free_memory(lbuf); + if (apu_tx_count) + metal_free_memory(apu_tx_count); + if (apu_rx_count) + metal_free_memory(apu_rx_count); + if (rpu_tx_count) + metal_free_memory(rpu_tx_count); + if (rpu_rx_count) + metal_free_memory(rpu_rx_count); + return ret; +} + +int shmem_throughput_demo() +{ + struct metal_device *dev; + struct metal_io_region *io; + struct channel_s ch; + int ret = 0; + + print_demo("shared memory throughput"); + memset(&ch, 0, sizeof(ch)); + + /* Open shared memory device */ + ret = metal_device_open(BUS_NAME, SHM_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", SHM_DEV_NAME); + goto out; + } + + /* Get shared memory device IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.shm_dev = dev; + ch.shm_io = io; + + /* Open TTC device */ + ret = metal_device_open(BUS_NAME, TTC_DEV_NAME, &dev); + if (ret) { + LPERROR("Failed to open device %s.\n", TTC_DEV_NAME); + goto out; + } + + /* Get TTC IO region */ + io = metal_device_io_region(dev, 0); + if (!io) { + LPERROR("Failed to map io region for %s.\n", dev->name); + ret = -ENODEV; + goto out; + } + ch.ttc_dev = dev; + ch.ttc_io = io; + + /* initialize remote_nkicked */ + ch.remote_nkicked = (atomic_flag)ATOMIC_FLAG_INIT; + atomic_flag_test_and_set(&ch.remote_nkicked); + + ret = init_ipi(); + if (ret) { + goto out; + } + ipi_kick_register_handler(ipi_irq_handler, &ch); + enable_ipi_kick(); + + /* Run atomic operation demo */ + ret = measure_shmem_throughput(&ch); + + /* disable IPI interrupt */ + disable_ipi_kick(); + deinit_ipi(); + +out: + if (ch.ttc_dev) + metal_device_close(ch.ttc_dev); + if (ch.shm_dev) + metal_device_close(ch.shm_dev); + return ret; + +} + diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c new file mode 100644 index 00000000..5b4e555f --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.c @@ -0,0 +1,31 @@ + +/* + * Copyright (c) 2016, Xilinx Inc. and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include "common.h" + +int sys_init() +{ + struct metal_init_params init_param = METAL_INIT_DEFAULTS; + int ret; + + ret = metal_init(&init_param); + if (ret) + LPERROR("Failed to initialize libmetal\n"); + return ret; +} + +void sys_cleanup() +{ + metal_finish(); +} + +void wait_for_interrupt(void) { + return; +} diff --git a/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h new file mode 100644 index 00000000..67d4cfb6 --- /dev/null +++ b/examples/system/linux/zynqmp/zynqmp_amp_demo/sys_init.h @@ -0,0 +1,16 @@ +/****************************************************************************** + * + * Copyright (C) 2017 Xilinx, Inc. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ******************************************************************************/ + +#ifndef __SYS_INIT_H__ +#define __SYS_INIT_H__ + +int sys_init(); +void sys_cleanup(); + +#endif /* __SYS_INIT_H__ */ + diff --git a/lib/log.h b/lib/log.h index 464624c0..a5e06f12 100644 --- a/lib/log.h +++ b/lib/log.h @@ -105,6 +105,17 @@ void metal_default_log_handler(enum metal_log_level level, #define metal_info(fmt, args...) metal_log(METAL_LOG_INFO, fmt, ##args) #define metal_dbg(fmt, args...) metal_log(METAL_LOG_DEBUG, fmt, ##args) +/** + * Convenience macros ML_ERR, ML_INFO, ML_DBG add source + * function name and the line number before the message. + * Inspired by pr_err, pr_info, etc. in the kernel's printk.h. + * Keep the original ML_ERR, ML_INFO, ML_DBG until the open-amp + * code is converted to use the new metal_*() macros. + */ +#define ML_ERR(fmt, args...) metal_err(fmt, ##args) +#define ML_INFO(fmt, args...) metal_info(fmt, ##args) +#define ML_DBG(fmt, args...) metal_dbg(fmt, ##args) + /** @} */ #ifdef __cplusplus diff --git a/lib/system/generic/xlnx/CMakeLists.txt b/lib/system/generic/xlnx/CMakeLists.txt index af02c7e1..8fe293f0 100644 --- a/lib/system/generic/xlnx/CMakeLists.txt +++ b/lib/system/generic/xlnx/CMakeLists.txt @@ -8,8 +8,6 @@ collect (PROJECT_LIB_HEADERS sys.h) collect (PROJECT_LIB_SOURCES irq.c) -include(CheckSymbolExists) -check_symbol_exists(SDT "bspconfig.h" HAS_SYSTEM_DT) -if (HAS_SYSTEM_DT) +if (CMAKE_C_FLAGS MATCHES "SDT") collect (PROJECT_LIB_HEADERS sys_devicetree.h) -endif() +endif (CMAKE_C_FLAGS MATCHES "SDT") diff --git a/test/system/linux/CMakeLists.txt b/test/system/linux/CMakeLists.txt index 1e259d8b..10c866d2 100644 --- a/test/system/linux/CMakeLists.txt +++ b/test/system/linux/CMakeLists.txt @@ -7,6 +7,7 @@ collect (PROJECT_LIB_TESTS threads.c) collect (PROJECT_LIB_TESTS spinlock.c) collect (PROJECT_LIB_TESTS alloc.c) collect (PROJECT_LIB_TESTS irq.c) +collect (PROJECT_LIB_TESTS io.c) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_MACHINE}) add_subdirectory(${PROJECT_MACHINE}) diff --git a/test/system/linux/io.c b/test/system/linux/io.c new file mode 100644 index 00000000..667f8a66 --- /dev/null +++ b/test/system/linux/io.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include "metal-test.h" + +/* + test snippet in device tree amba subnode: + shm0: shm@0 { + compatible = "shm_uio"; + reg = <0x0 3ed80000 0x0 0x1000 + 0x0 3fd80000 0x0 0x1000>; + }; +*/ + +#define OFFSET 0x10 +#define SHM_DEV "3ed80000.shm" +#define BUS "platform" +#define NUM_REGIONS 2 +#define TESTVAL 0xfeedbeef + + +/* Test for linux device having multiple, accessible IO regions */ +static int io_test(void) { + int ret = 0, i; + struct metal_device *dev; + struct metal_io_region *io[2]; + + ret = metal_device_open(BUS, SHM_DEV, &dev); + if (ret) { + perror("metal_device_open(\"BUS\", \"SHM_DEV\", dev)\");"); + goto done; + } + + for (i = 0; i < NUM_REGIONS; ++i) { + io[i] = metal_device_io_region(dev, i); + if (io[i] == NULL) { + printf("Failed to map IO region (%d)\n", i); + ret = -EINVAL; + goto cleanup; + } + + /* write in some test value that differs between each region */ + metal_io_write32(io[i], OFFSET, TESTVAL + 1); + } + + for (i = 0; i < NUM_REGIONS; ++i) { + if (metal_io_read32(io[i], OFFSET) != (long unsigned int)(TESTVAL+1)) { + ret = -EINVAL; + break; + } + } + +cleanup: + metal_device_close(dev); + +done: + return ret; +} +METAL_ADD_TEST(io_test);