Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require hashes for all pip installs #1871

Merged
merged 18 commits into from
Oct 9, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ WORKDIR $WORKDIR
ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt
ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py

ARG BEAM_VERSION=${beamVersion}
ARG BEAM_PACKAGE=apache-beam[gcp]==$BEAM_VERSION
RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

RUN pip install $BEAM_PACKAGE
RUN pip install -U -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip install -U -r --require-hashes $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip download --require-hashes --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE

ENTRYPOINT ${entryPoint}
12 changes: 5 additions & 7 deletions plugins/core-plugin/src/main/resources/Dockerfile-template-xlang
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ FROM ${basePythonContainerImage} as python-base
# Build args
ARG WORKDIR=/template
ARG REQUIREMENTS_FILE=requirements.txt
ARG BEAM_VERSION=${beamVersion}
ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION
ARG PY_VERSION=${pythonVersion}

# Copy template files to /template
Expand All @@ -16,14 +14,14 @@ ${filesToCopy}
${directoriesToCopy}
WORKDIR $WORKDIR

# Create requirements.txt file if not provided
RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi
# Throw if requirements.txt file not provided
RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

# Install dependencies to launch the pipeline and download to reduce startup time
RUN python -m venv /venv \
&& /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \
&& /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& /venv/bin/python -m ensurepip --upgrade \
&& /venv/bin/pip install --no-cache-dir --require-hashes -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip download --require-hashes --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
&& cp -r /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/
# change to mv once Beam 2.57.0 is released
Expand Down
13 changes: 4 additions & 9 deletions plugins/core-plugin/src/main/resources/Dockerfile-template-yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ FROM ${basePythonContainerImage} as python-base
# Build args
ARG WORKDIR=/template
ARG REQUIREMENTS_FILE=requirements.txt
ARG BEAM_VERSION=${beamVersion}
ARG BEAM_PACKAGE=apache-beam[dataframe,gcp,test,yaml]==$BEAM_VERSION
ARG PY_VERSION=${pythonVersion}

# Copy template files to /template
Expand All @@ -16,18 +14,15 @@ ${filesToCopy}
${directoriesToCopy}
WORKDIR $WORKDIR

# Create requirements.txt file if not provided
RUN if ! [ -f requirements.txt ] ; then echo "$BEAM_PACKAGE" > requirements.txt ; fi
# Throw if requirements.txt file not provided
RUN if ! [ -f requirements.txt ] ; then >&2 echo "error: no requirements.txt file found" && exit 1 ; fi

# Install dependencies to launch the pipeline and download to reduce startup time
# Remove Jinja2 dependency once YAML templatization support is added to Beam
# TODO - remove `pip uninstall apache-beam` line when repo is upgraded to Beam 2.59.0
RUN python -m venv /venv \
&& /venv/bin/pip uninstall apache-beam -y \
&& /venv/bin/pip install --no-cache-dir --upgrade pip setuptools \
&& /venv/bin/pip install --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip install --no-cache-dir -U Jinja2 \
&& /venv/bin/pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& /venv/bin/pip install --require-hashes --no-cache-dir -U -r $REQUIREMENTS_FILE \
&& /venv/bin/pip download --require-hashes --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $REQUIREMENTS_FILE \
&& rm -rf /usr/local/lib/python$PY_VERSION/site-packages \
&& mv /venv/lib/python$PY_VERSION/site-packages /usr/local/lib/python$PY_VERSION/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ public void testGeneratePythonDockerfileDefaults() throws IOException, TemplateE
assertTrue(outputFile.exists());
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM " + BASE_PYTHON_CONTAINER_IMAGE);
assertThat(fileContents).contains("ARG BEAM_VERSION=beam_version");
assertThat(fileContents)
.contains("RUN pip install -U -r --require-hashes $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE");
assertThat(fileContents)
.contains(String.format("ENTRYPOINT [\"%s\"]", PYTHON_LAUNCHER_ENTRYPOINT));
}
Expand All @@ -73,7 +74,8 @@ public void testGeneratePythonDockerfile() throws IOException, TemplateException
assertTrue(outputFile.exists());
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM a python container image");
assertThat(fileContents).contains("ARG BEAM_VERSION=beam_version");
assertThat(fileContents)
.contains("RUN pip install -U -r --require-hashes $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE");
assertThat(fileContents).contains("COPY main.py requirements.txt* /$WORKDIR/");
assertThat(fileContents).contains("ENTRYPOINT [\"python/entry/point\"]");
}
Expand All @@ -99,7 +101,8 @@ public void testGenerateXLangDockerfileDefaults() throws IOException, TemplateEx
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM " + BASE_CONTAINER_IMAGE);
assertThat(fileContents).contains("FROM " + BASE_PYTHON_CONTAINER_IMAGE);
assertThat(fileContents).contains("BEAM_VERSION=beam_version");
assertThat(fileContents)
.contains("pip install --no-cache-dir --require-hashes -U -r $REQUIREMENTS_FILE");
assertThat(fileContents).contains("PY_VERSION=" + PYTHON_VERSION);
assertThat(fileContents).contains("ENV DATAFLOW_JAVA_COMMAND_SPEC=");
assertThat(fileContents)
Expand Down Expand Up @@ -141,7 +144,6 @@ public void testGenerateXLangDockerfile() throws IOException, TemplateException
.contains("COPY container-generated-metadata.json requirements.txt* /$WORKDIR/");
assertThat(fileContents).contains("COPY containerName/ /$WORKDIR/containerName/");
assertThat(fileContents).contains("COPY otherDirectory/ /$WORKDIR/otherDirectory/");
assertThat(fileContents).contains("=beam_version");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents).contains("ENTRYPOINT [\"java/entry/point\"]");
assertThat(fileContents).contains("ENV DATAFLOW_JAVA_COMMAND_SPEC=command_spec");
Expand All @@ -160,7 +162,6 @@ public void testGenerateYamlDockerfileDefaults() throws IOException, TemplateExc
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM " + BASE_CONTAINER_IMAGE);
assertThat(fileContents).contains("FROM " + BASE_PYTHON_CONTAINER_IMAGE);
assertThat(fileContents).contains("BEAM_VERSION=beam_version");
assertThat(fileContents).contains("PY_VERSION=" + PYTHON_VERSION);
assertThat(fileContents)
.contains(String.format("ENTRYPOINT [\"%s\"]", PYTHON_LAUNCHER_ENTRYPOINT));
Expand All @@ -183,7 +184,6 @@ public void testGenerateYamlDockerfile() throws IOException, TemplateException {
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM a python container image");
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=beam_version");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents)
.doesNotContainMatch(
Expand All @@ -209,7 +209,6 @@ public void testGenerateYamlDockerfileWithOtherFiles() throws IOException, Templ
String fileContents = Files.toString(outputFile, StandardCharsets.UTF_8);
assertThat(fileContents).contains("FROM a python container image");
assertThat(fileContents).contains("FROM a java container image");
assertThat(fileContents).contains("=beam_version");
assertThat(fileContents).contains("=py_version");
assertThat(fileContents).contains("COPY other_file /$WORKDIR/");
assertThat(fileContents).contains("ENTRYPOINT [\"python/entry/point\"]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -526,13 +527,23 @@ private void stageFlexJavaTemplate(
if (definition.getTemplateAnnotation().type() == TemplateType.XLANG) {
String dockerfileContainer = outputClassesDirectory.getPath() + "/" + containerName;
String dockerfilePath = dockerfileContainer + "/Dockerfile";
LOG.info("Generating dockerfile " + dockerfilePath);
String xlangCommandSpec = "/template/" + containerName + "/resources/" + commandSpecFileName;
File dockerfile = new File(dockerfilePath);
if (!dockerfile.exists()) {
Map<String, Set<String>> filesToCopy =
Map.of(
String.format("%s-generated-metadata.json", containerName),
Set.of("requirements.txt*"));
// Copy in requirements.txt if present
File sourceRequirements = new File(outputClassesDirectory.getPath() + "/requirements.txt");
File destRequirements = new File(dockerfileContainer + "/requirements.txt");
if (sourceRequirements.exists()) {
Files.copy(
sourceRequirements.toPath(),
destRequirements.toPath(),
StandardCopyOption.REPLACE_EXISTING);
}
Set<String> directoriesToCopy = Set.of(containerName);
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
Expand Down Expand Up @@ -699,10 +710,20 @@ private void stageFlexPythonTemplate(
String containerName,
String templatePath)
throws IOException, InterruptedException, TemplateException {

String dockerfilePath = outputClassesDirectory.getPath() + "/" + containerName + "/Dockerfile";
String dockerfileContainer = outputClassesDirectory.getPath() + "/" + containerName;
String dockerfilePath = dockerfileContainer + "/Dockerfile";
LOG.info("Generating dockerfile " + dockerfilePath);
File dockerfile = new File(dockerfilePath);
if (!dockerfile.exists()) {
// Copy in requirements.txt if present
File sourceRequirements = new File(outputClassesDirectory.getPath() + "/requirements.txt");
File destRequirements = new File(dockerfileContainer + "/requirements.txt");
if (sourceRequirements.exists()) {
Files.copy(
sourceRequirements.toPath(),
destRequirements.toPath(),
StandardCopyOption.REPLACE_EXISTING);
}
damccorm marked this conversation as resolved.
Show resolved Hide resolved
Map<String, Set<String>> filesToCopy = Map.of("main.py", Set.of("requirements.txt*"));
DockerfileGenerator.builder(
definition.getTemplateAnnotation().type(),
Expand Down Expand Up @@ -919,7 +940,7 @@ private void stageXlangUsingDockerfile(String imagePath, String containerName)
+ " - --cache-repo="
+ cacheFolder);
}

LOG.info("Submitting cloudbuild job with config: " + cloudbuildFile.getAbsolutePath());
Process stageProcess =
runCommand(
new String[] {
Expand Down
2 changes: 1 addition & 1 deletion python/src/main/python/streaming-llm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt
ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py

# Install dependencies to launch the pipeline
RUN pip install -U -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip install -U --require-hashes -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE

ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
2 changes: 1 addition & 1 deletion python/src/main/python/word-count-python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ ENV FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE=requirements.txt
ENV FLEX_TEMPLATE_PYTHON_PY_FILE=main.py

# Install dependencies to launch the pipeline
RUN pip install -U -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip install -U --require-hashes -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE
RUN pip download --no-cache-dir --dest /tmp/dataflow-requirements-cache -r $FLEX_TEMPLATE_PYTHON_REQUIREMENTS_FILE

ENTRYPOINT ["/opt/google/dataflow/python_template_launcher"]
Loading