Skip to content

DAOS-17127 mercury: Add address to "already present" log. #128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
mercury (2.4.0-4) unstable; urgency=medium
[ Joseph Moore ]
* Add patch file for extra data log entry and add address to "already present" log

-- Joseph Moore <[email protected]> Tue, 11 Mar 2025 10:00:00 -0600

mercury (2.4.0-3) unstable; urgency=medium
[ Joseph Moore ]
* Add patch to na_ucx.c to flush ep prior to close

-- Joseph Moore <[email protected]> Wed, 15 Jan 2025 10:00:00 -0600

mercury (2.4.0-2) unstable; urgency=medium
[ Joseph Moore ]
Expand Down
26 changes: 26 additions & 0 deletions extra_data.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
diff --git a/src/mercury.c b/src/mercury.c
index 0bcddd8..c9b4641 100644
--- a/src/mercury.c
+++ b/src/mercury.c
@@ -772,6 +772,10 @@ hg_set_struct(struct hg_private_handle *hg_handle,
HG_CHECK_SUBSYS_ERROR(rpc, hg_proc_get_extra_buf(proc), error, ret,
HG_OVERFLOW, "Extra bulk handle could not fit into buffer");

+ HG_LOG_SUBSYS_WARNING(perf,
+ "Buffer allocated for send of extra input data, payload size %ld",
+ hg_proc_get_size_used(proc) + header_offset);
+
*more_data = true;
}

@@ -939,7 +943,9 @@ hg_get_extra_payload(struct hg_private_handle *hg_handle, hg_op_t op,
*extra_buf = hg_mem_aligned_alloc(page_size, *extra_buf_size);
HG_CHECK_SUBSYS_ERROR(rpc, *extra_buf == NULL, done, ret, HG_NOMEM,
"Could not allocate extra payload buffer");
-
+ HG_LOG_SUBSYS_WARNING(perf,
+ "Buffer allocated for extra input data, payload size %ld, handle %p",
+ *extra_buf_size, (void *) hg_handle->handle.core_handle);
ret = HG_Bulk_create(hg_handle->handle.info.hg_class, 1, extra_buf,
extra_buf_size, HG_BULK_READWRITE, &local_handle);
HG_CHECK_SUBSYS_HG_ERROR(rpc, done, ret, "Could not create HG bulk handle");
9 changes: 8 additions & 1 deletion mercury.spec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Name: mercury
Version: 2.4.0
Release: 2%{?dist}
Release: 4%{?dist}

# --without ucx build switch
%bcond_without ucx
Expand All @@ -16,6 +16,7 @@ Group: Development/Libraries
URL: http://mercury-hpc.github.io/
Source0: https://github.com/mercury-hpc/%{name}/releases/download/v%{dl_version}/%{name}-%{dl_version}.tar.bz2
Patch0: na_ucx.patch
Patch1: na_ucx_ep_flush.patch

BuildRequires: libfabric-devel >= 1.20
BuildRequires: cmake
Expand Down Expand Up @@ -131,6 +132,12 @@ Mercury plugin to support the UCX transport.
%{_libdir}/cmake/

%changelog
* Tue Mar 11 2025 Joseph Moore <[email protected]> - 2.4.0-4
- Change to addr_release for "already present" warning.

* Wed Jan 15 2025 Joseph Moore <[email protected]> - 2.4.0-3
- Add patch to na_ucx.c to flush end point prior to close.

* Tue Jan 07 2025 Joseph Moore <[email protected]> - 2.4.0-2
- Enable debug RPMs for Leap sub-packages.

Expand Down
2 changes: 1 addition & 1 deletion na_ucx.patch
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ index 84eb8b0..e4b6676 100644
+ if (na_ucx_addr != NULL) {
+ NA_LOG_SUBSYS_WARNING(addr,
+ "An entry is already present for this address");
+ na_ucx_addr_map_remove(&na_ucx_class->addr_map, na_ucx_addr);
+ na_ucx_addr_release(na_ucx_addr);
+ }

/* Insert new entry and create new address */
Expand Down
64 changes: 64 additions & 0 deletions na_ucx_ep_flush.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c
index 6e9c3b0..2f157da 100644
--- a/src/na/na_ucx.c
+++ b/src/na/na_ucx.c
@@ -441,6 +441,12 @@ na_ucp_ep_create(ucp_worker_h worker, ucp_ep_params_t *ep_params,
static void
na_ucp_ep_error_cb(void *arg, ucp_ep_h ep, ucs_status_t status);

+/**
+ * Flush endpoint.
+ */
+static ucs_status_ptr_t
+na_ucp_ep_flush(ucp_ep_h ep);
+
/**
* Close endpoint.
*/
@@ -1940,6 +1946,21 @@ na_ucp_ep_error_cb(
na_ucx_addr_ref_decr(na_ucx_addr);
}

+/*---------------------------------------------------------------------------*/
+static ucs_status_ptr_t
+na_ucp_ep_flush(ucp_ep_h ep)
+{
+ const ucp_request_param_t flush_params = {
+ .op_attr_mask = 0};
+ ucs_status_ptr_t status_ptr = ucp_ep_flush_nbx(ep, &flush_params);
+
+ NA_CHECK_SUBSYS_ERROR_DONE(addr,
+ status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr),
+ "ucp_ep_flush_nb() failed (%s)",
+ ucs_status_string(UCS_PTR_STATUS(status_ptr)));
+ return status_ptr;
+}
+
/*---------------------------------------------------------------------------*/
static void
na_ucp_ep_close(ucp_ep_h ep)
@@ -2859,8 +2880,23 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr)
if (na_ucx_addr->ucp_ep != NULL) {
/* NB. for deserialized addresses that are not "connected" addresses, do
* not close the EP */
- if (na_ucx_addr->worker_addr == NULL)
+ if (na_ucx_addr->worker_addr == NULL) {
+ if (!na_ucx_addr->na_ucx_class->ucp_listener) {
+ ucs_status_ptr_t status_ptr = na_ucp_ep_flush(na_ucx_addr->ucp_ep);
+
+ if (UCS_PTR_IS_PTR(status_ptr)) {
+ ucs_status_t status;
+
+ do {
+ ucp_worker_progress(na_ucx_addr->na_ucx_class->ucp_worker);
+ status = ucp_request_check_status(status_ptr);
+ } while (status == UCS_INPROGRESS);
+ ucp_request_free(status_ptr);
+ }
+ }
+
na_ucp_ep_close(na_ucx_addr->ucp_ep);
+ }
na_ucx_addr->ucp_ep = NULL;
}

3 changes: 2 additions & 1 deletion packaging/Dockerfile.coverity
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#
# Copyright 2018-2020, Intel Corporation
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# 'recipe' for Docker to build for a Coverity scan.
#

# Pull base image
FROM fedora:latest
MAINTAINER daos-stack <[email protected]>
LABEL maintainer="daos-stack <[email protected]>""

# use same UID as host and default value of 1000 if not specified
ARG UID=1000
Expand Down
26 changes: 15 additions & 11 deletions packaging/Dockerfile.mockbuild
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#
# Copyright 2018-2024 Intel Corporation
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# 'recipe' for Docker to build an RPM
#
Expand All @@ -13,15 +14,16 @@ LABEL maintainer="[email protected]"

# Use local repo server if present
ARG REPO_FILE_URL
RUN if [ -n "$REPO_FILE_URL" ]; then \
cd /etc/yum.repos.d/ && \
curl -k -f -o daos_ci-fedora-artifactory.repo.tmp \
"$REPO_FILE_URL"daos_ci-fedora-artifactory.repo && \
for file in *.repo; do \
true > $file; \
done; \
mv daos_ci-fedora-artifactory.repo{.tmp,}; \
fi
ARG DAOS_LAB_CA_FILE_URL
ARG REPOSITORY_NAME
# script to install OS updates basic tools and daos dependencies
# COPY ./utils/scripts/install-fedora.sh /tmp/install.sh
# script to setup local repo if available
COPY ./packaging/scripts/repo-helper-fedora.sh /tmp/repo-helper.sh

RUN chmod +x /tmp/repo-helper.sh && \
/tmp/repo-helper.sh && \
rm -f /tmp/repo-helper.sh

# Install basic tools
RUN dnf -y install mock make \
Expand All @@ -33,13 +35,15 @@ RUN dnf -y install mock make \
ARG UID=1000

# Add build user (to keep rpmbuild happy)
ENV USER build
ENV PASSWD build
ENV USER=build
ENV PASSWD=build
# add the user to the mock group so it can run mock
RUN if [ $UID != 0 ]; then \
useradd -u $UID -ms /bin/bash $USER; \
echo "$USER:$PASSWD" | chpasswd; \
usermod -a -G mock $USER; \
mkdir -p /var/cache/mock; \
chown $USER:root /var/cache/mock; \
fi

ARG CB0
Expand Down
54 changes: 21 additions & 33 deletions packaging/Dockerfile.ubuntu.20.04
Original file line number Diff line number Diff line change
@@ -1,62 +1,50 @@
# Keep Dockerfile.ubuntu the same as this file until all packaging
# jobs are fixed to have a Dockerfile.ubuntu, and then the common
# Jenkinsfile will be changed to use Dockerfile.ubuntu.
#
# Copyright 2019-2021, Intel Corporation
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# 'recipe' for Docker to build an Debian package
#
# Pull base image
FROM ubuntu:20.04
ARG BASE_DISTRO=ubuntu:20.04
FROM $BASE_DISTRO
LABEL org.opencontainers.image.authors="[email protected]"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
curl gpg
# Needed for later use of BASE_DISTRO
ARG BASE_DISTRO

ARG REPO_FILE_URL
RUN if [ -n "$REPO_FILE_URL" ]; then \
cd /etc/apt/sources.list.d && \
curl -f -o daos_ci-ubuntu20.04-artifactory.list.tmp \
"$REPO_FILE_URL"daos_ci-ubuntu20.04-artifactory.list && \
true > ../sources.list && \
mv daos_ci-ubuntu20.04-artifactory.list.tmp \
daos_ci-ubuntu20.04-artifactory.list; \
url="${REPO_FILE_URL%/*/}/hpe-ilorest-ubuntu-bionic-proxy/"; \
else \
url="https://downloads.linux.hpe.com/SDR/repo/ilorest/"; \
fi; \
cd -; \
mkdir -p /usr/local/share/keyrings/; \
curl -f -O "$url"GPG-KEY-hprest; \
gpg --no-default-keyring --keyring ./temp-keyring.gpg \
--import GPG-KEY-hprest; \
gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \
--output /usr/local/share/keyrings/hpe-sdr-public.gpg; \
rm ./temp-keyring.gpg; \
curl -f -O "$REPO_FILE_URL"esad_repo.key; \
gpg --no-default-keyring --keyring ./temp-keyring.gpg \
--import esad_repo.key; \
gpg --no-default-keyring --keyring ./temp-keyring.gpg --export \
--output /usr/local/share/keyrings/daos-stack-public.gpg
ARG DAOS_LAB_CA_FILE_URL
ARG REPOSITORY_NAME
# script to setup local repo if available
COPY ./packaging/scripts/repo-helper-ubuntu.sh /tmp/repo-helper.sh

RUN chmod +x /tmp/repo-helper.sh && \
/tmp/repo-helper.sh && \
rm -f /tmp/repo-helper.sh

# Install basic tools
# Install basic tools - rpmdevtools temporary commented out.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
autoconf bash ca-certificates curl debhelper dh-make \
dpkg-dev dh-python doxygen gcc git git-buildpackage \
javahelper locales make patch pbuilder pkg-config \
python3-dev python3-distro python3-distutils rpm scons wget \
cmake valgrind rpmdevtools
python3-dev python3-distro python3-distutils rpm scons sudo \
wget cmake valgrind # rpmdevtools

# use same UID as host and default value of 1000 if not specified
ARG UID=1000

# Add build user (to keep chrootbuild happy)
ENV USER build
ENV USER=build
RUN useradd -u $UID -ms /bin/bash $USER

# need to run the build command as root, as it needs to chroot
RUN if ! grep "^#includedir /etc/sudoers.d" /etc/sudoers; then \
echo "#includedir /etc/sudoers.d" >> /etc/sudoers; \
fi; \
echo "Defaults env_keep += \"DPKG_GENSYMBOLS_CHECK_LEVEL\"" > /etc/sudoers.d/build; \
echo "build ALL=(ALL) NOPASSWD: /usr/bin/tee /root/.pbuilderrc" >> /etc/sudoers.d/build; \
echo "build ALL=(ALL) NOPASSWD: /usr/bin/tee /root/.pbuilderrc" >> /etc/sudoers.d/build; \
echo "build ALL=(ALL) NOPASSWD: /usr/sbin/pbuilder" >> /etc/sudoers.d/build; \
chmod 0440 /etc/sudoers.d/build; \
visudo -c; \
Expand Down
3 changes: 2 additions & 1 deletion packaging/Dockerfile.ubuntu.rolling
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#
# Copyright 2019, Intel Corporation
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# 'recipe' for Docker to build an Debian package
#
# Pull base image
FROM ubuntu:rolling
Maintainer daos-stack <daos@daos.groups.io>
LABEL org.opencontainers.image.authors="[email protected]"

# use same UID as host and default value of 1000 if not specified
ARG UID=1000
Expand Down
26 changes: 22 additions & 4 deletions packaging/rpm_chrootbuild
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

set -uex

: "${HTTPS_PROXY:=}"
: "${REPO_FILE_URL:=}"
: "${ARCH:=$(arch)}"
: "${REPOSITORY_NAME:=artifactory}"

cp /etc/mock/"$CHROOT_NAME".cfg mock.cfg

# Enable mock ccache plugin
Expand All @@ -11,7 +16,20 @@ config_opts['plugin_conf']['ccache_opts']['dir'] = "%(cache_topdir)s/%(root)s/cc
EOF


if [[ $CHROOT_NAME == *epel-8-x86_64 ]]; then
# Optionally add a proxy to mock
if [ -n "$HTTPS_PROXY" ];then
yum_proxy="http://${HTTPS_PROXY##*//}"
echo "config_opts['https_proxy'] = '$yum_proxy'" >> mock.cfg
fi

# No proxy for local mirrors
if [ -n "$REPO_FILE_URL" ]; then
direct="${REPO_FILE_URL##*//}"
direct="${direct%%/*}"
echo "config_opts['no_proxy'] = '${direct}'" >> mock.cfg
fi

if [[ $CHROOT_NAME == *"epel-8-${ARCH}" ]]; then
cat <<EOF >> mock.cfg
config_opts['module_setup_commands'] = [
('enable', 'javapackages-tools:201801'),
Expand All @@ -21,7 +39,7 @@ EOF
fi

# Use dnf on CentOS 7
if [[ $CHROOT_NAME == *epel-7-x86_64 ]]; then
if [[ $CHROOT_NAME == *"epel-7-$ARCH" ]]; then
MOCK_OPTIONS="--dnf --no-bootstrap-chroot${MOCK_OPTIONS:+ }$MOCK_OPTIONS"
fi

Expand Down Expand Up @@ -61,7 +79,7 @@ if [ -n "${ARTIFACTORY_URL:-}" ] && "$LOCAL_REPOS"; then
fi
fi
curl -sSf "$REPO_FILE_URL"daos_ci-"${CHROOT_NAME%-*}".repo >> mock.cfg
repo_adds+=("--enablerepo *-artifactory")
repo_adds+=("--enablerepo *-${REPOSITORY_NAME}")
fi
fi

Expand Down Expand Up @@ -127,7 +145,7 @@ if ! eval time mock -r mock.cfg ${repo_dels[*]} ${repo_adds[*]} --no-clean \
fi

# Save the ccache
if [ -d /scratch/ ]; then
if [ -d /scratch/mock ]; then
mkdir -p "$bs_dir"/
if ! flock "$bs_dir" -c "tar -czf $bs_dir/ccache-$CHROOT_NAME-$PACKAGE.tar.gz /var/cache/mock/${CHROOT_NAME}/ccache"; then
echo "Failed to save ccache. Plowing onward."
Expand Down
Loading